| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.server; |
| |
| import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig; |
| |
| import static java.lang.annotation.RetentionPolicy.SOURCE; |
| |
| import android.annotation.IntDef; |
| import android.annotation.Nullable; |
| import android.content.Context; |
| import android.content.pm.PackageManager; |
| import android.content.pm.VersionedPackage; |
| import android.net.NetworkStackClient; |
| import android.os.Environment; |
| import android.os.Handler; |
| import android.os.Looper; |
| import android.os.SystemClock; |
| import android.provider.DeviceConfig; |
| import android.text.TextUtils; |
| import android.util.ArrayMap; |
| import android.util.ArraySet; |
| import android.util.AtomicFile; |
| import android.util.Slog; |
| import android.util.Xml; |
| |
| import com.android.internal.annotations.GuardedBy; |
| import com.android.internal.annotations.VisibleForTesting; |
| import com.android.internal.os.BackgroundThread; |
| import com.android.internal.util.FastXmlSerializer; |
| import com.android.internal.util.XmlUtils; |
| |
| import libcore.io.IoUtils; |
| |
| import org.xmlpull.v1.XmlPullParser; |
| import org.xmlpull.v1.XmlPullParserException; |
| import org.xmlpull.v1.XmlSerializer; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.lang.annotation.Retention; |
| import java.nio.charset.StandardCharsets; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.concurrent.TimeUnit; |
| |
| /** |
| * Monitors the health of packages on the system and notifies interested observers when packages |
| * fail. On failure, the registered observer with the least user impacting mitigation will |
| * be notified. |
| */ |
| public class PackageWatchdog { |
| private static final String TAG = "PackageWatchdog"; |
| |
| static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS = |
| "watchdog_trigger_failure_duration_millis"; |
| static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT = |
| "watchdog_trigger_failure_count"; |
| static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED = |
| "watchdog_explicit_health_check_enabled"; |
| |
| // Duration to count package failures before it resets to 0 |
| private static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS = |
| (int) TimeUnit.MINUTES.toMillis(1); |
| // Number of package failures within the duration above before we notify observers |
| private static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5; |
| // Whether explicit health checks are enabled or not |
| private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true; |
| |
| private static final int DB_VERSION = 1; |
| private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog"; |
| private static final String TAG_PACKAGE = "package"; |
| private static final String TAG_OBSERVER = "observer"; |
| private static final String ATTR_VERSION = "version"; |
| private static final String ATTR_NAME = "name"; |
| private static final String ATTR_DURATION = "duration"; |
| private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration"; |
| private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check"; |
| |
| @GuardedBy("PackageWatchdog.class") |
| private static PackageWatchdog sPackageWatchdog; |
| |
| private final Object mLock = new Object(); |
| // System server context |
| private final Context mContext; |
| // Handler to run short running tasks |
| private final Handler mShortTaskHandler; |
| // Handler for processing IO and long running tasks |
| private final Handler mLongTaskHandler; |
| // Contains (observer-name -> observer-handle) that have ever been registered from |
| // previous boots. Observers with all packages expired are periodically pruned. |
| // It is saved to disk on system shutdown and repouplated on startup so it survives reboots. |
| @GuardedBy("mLock") |
| private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>(); |
| // File containing the XML data of monitored packages /data/system/package-watchdog.xml |
| private final AtomicFile mPolicyFile; |
| private final ExplicitHealthCheckController mHealthCheckController; |
| private final NetworkStackClient mNetworkStackClient; |
| @GuardedBy("mLock") |
| private boolean mIsPackagesReady; |
| // Flag to control whether explicit health checks are supported or not |
| @GuardedBy("mLock") |
| private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED; |
| @GuardedBy("mLock") |
| private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; |
| @GuardedBy("mLock") |
| private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; |
| // SystemClock#uptimeMillis when we last executed #syncState |
| // 0 if no prune is scheduled. |
| @GuardedBy("mLock") |
| private long mUptimeAtLastStateSync; |
| |
| private PackageWatchdog(Context context) { |
| // Needs to be constructed inline |
| this(context, new AtomicFile( |
| new File(new File(Environment.getDataDirectory(), "system"), |
| "package-watchdog.xml")), |
| new Handler(Looper.myLooper()), BackgroundThread.getHandler(), |
| new ExplicitHealthCheckController(context), |
| NetworkStackClient.getInstance()); |
| } |
| |
| /** |
| * Creates a PackageWatchdog that allows injecting dependencies. |
| */ |
| @VisibleForTesting |
| PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, |
| Handler longTaskHandler, ExplicitHealthCheckController controller, |
| NetworkStackClient networkStackClient) { |
| mContext = context; |
| mPolicyFile = policyFile; |
| mShortTaskHandler = shortTaskHandler; |
| mLongTaskHandler = longTaskHandler; |
| mHealthCheckController = controller; |
| mNetworkStackClient = networkStackClient; |
| loadFromFile(); |
| } |
| |
| /** Creates or gets singleton instance of PackageWatchdog. */ |
| public static PackageWatchdog getInstance(Context context) { |
| synchronized (PackageWatchdog.class) { |
| if (sPackageWatchdog == null) { |
| sPackageWatchdog = new PackageWatchdog(context); |
| } |
| return sPackageWatchdog; |
| } |
| } |
| |
| /** |
| * Called during boot to notify when packages are ready on the device so we can start |
| * binding. |
| */ |
| public void onPackagesReady() { |
| synchronized (mLock) { |
| mIsPackagesReady = true; |
| mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName), |
| packages -> onSupportedPackages(packages), |
| () -> syncRequestsAsync()); |
| setPropertyChangedListenerLocked(); |
| updateConfigs(); |
| registerNetworkStackHealthListener(); |
| } |
| } |
| |
| /** |
| * Registers {@code observer} to listen for package failures |
| * |
| * <p>Observers are expected to call this on boot. It does not specify any packages but |
| * it will resume observing any packages requested from a previous boot. |
| */ |
| public void registerHealthObserver(PackageHealthObserver observer) { |
| synchronized (mLock) { |
| ObserverInternal internalObserver = mAllObservers.get(observer.getName()); |
| if (internalObserver != null) { |
| internalObserver.mRegisteredObserver = observer; |
| } |
| } |
| } |
| |
| /** |
| * Starts observing the health of the {@code packages} for {@code observer} and notifies |
| * {@code observer} of any package failures within the monitoring duration. |
| * |
| * <p>If monitoring a package supporting explicit health check, at the end of the monitoring |
| * duration if {@link #onHealthCheckPassed} was never called, |
| * {@link PackageHealthObserver#execute} will be called as if the package failed. |
| * |
| * <p>If {@code observer} is already monitoring a package in {@code packageNames}, |
| * the monitoring window of that package will be reset to {@code durationMs} and the health |
| * check state will be reset to a default depending on if the package is contained in |
| * {@link mPackagesWithExplicitHealthCheckEnabled}. |
| * |
| * @throws IllegalArgumentException if {@code packageNames} is empty |
| * or {@code durationMs} is less than 1 |
| */ |
| public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames, |
| long durationMs) { |
| if (packageNames.isEmpty()) { |
| Slog.wtf(TAG, "No packages to observe, " + observer.getName()); |
| return; |
| } |
| if (durationMs < 1) { |
| // TODO: Instead of failing, monitor for default? 48hrs? |
| throw new IllegalArgumentException("Invalid duration " + durationMs + "ms for observer " |
| + observer.getName() + ". Not observing packages " + packageNames); |
| } |
| |
| List<MonitoredPackage> packages = new ArrayList<>(); |
| for (int i = 0; i < packageNames.size(); i++) { |
| // Health checks not available yet so health check state will start INACTIVE |
| packages.add(new MonitoredPackage(packageNames.get(i), durationMs, false)); |
| } |
| |
| // Sync before we add the new packages to the observers. This will #pruneObservers, |
| // causing any elapsed time to be deducted from all existing packages before we add new |
| // packages. This maintains the invariant that the elapsed time for ALL (new and existing) |
| // packages is the same. |
| syncState("observing new packages"); |
| |
| synchronized (mLock) { |
| ObserverInternal oldObserver = mAllObservers.get(observer.getName()); |
| if (oldObserver == null) { |
| Slog.d(TAG, observer.getName() + " started monitoring health " |
| + "of packages " + packageNames); |
| mAllObservers.put(observer.getName(), |
| new ObserverInternal(observer.getName(), packages)); |
| } else { |
| Slog.d(TAG, observer.getName() + " added the following " |
| + "packages to monitor " + packageNames); |
| oldObserver.updatePackagesLocked(packages); |
| } |
| } |
| |
| // Register observer in case not already registered |
| registerHealthObserver(observer); |
| |
| // Sync after we add the new packages to the observers. We may have received packges |
| // requiring an earlier schedule than we are currently scheduled for. |
| syncState("updated observers"); |
| } |
| |
| /** |
| * Unregisters {@code observer} from listening to package failure. |
| * Additionally, this stops observing any packages that may have previously been observed |
| * even from a previous boot. |
| */ |
| public void unregisterHealthObserver(PackageHealthObserver observer) { |
| synchronized (mLock) { |
| mAllObservers.remove(observer.getName()); |
| } |
| syncState("unregistering observer: " + observer.getName()); |
| } |
| |
| /** |
| * Returns packages observed by {@code observer} |
| * |
| * @return an empty set if {@code observer} has some packages observerd from a previous boot |
| * but has not registered itself in the current boot to receive notifications. Returns null |
| * if there are no active packages monitored from any boot. |
| */ |
| @Nullable |
| public Set<String> getPackages(PackageHealthObserver observer) { |
| synchronized (mLock) { |
| for (int i = 0; i < mAllObservers.size(); i++) { |
| if (observer.getName().equals(mAllObservers.keyAt(i))) { |
| if (observer.equals(mAllObservers.valueAt(i).mRegisteredObserver)) { |
| return mAllObservers.valueAt(i).mPackages.keySet(); |
| } |
| return Collections.emptySet(); |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Called when a process fails either due to a crash or ANR. |
| * |
| * <p>For each package contained in the process, one registered observer with the least user |
| * impact will be notified for mitigation. |
| * |
| * <p>This method could be called frequently if there is a severe problem on the device. |
| */ |
| public void onPackageFailure(List<VersionedPackage> packages) { |
| mLongTaskHandler.post(() -> { |
| synchronized (mLock) { |
| if (mAllObservers.isEmpty()) { |
| return; |
| } |
| |
| for (int pIndex = 0; pIndex < packages.size(); pIndex++) { |
| VersionedPackage versionedPackage = packages.get(pIndex); |
| // Observer that will receive failure for versionedPackage |
| PackageHealthObserver currentObserverToNotify = null; |
| int currentObserverImpact = Integer.MAX_VALUE; |
| |
| // Find observer with least user impact |
| for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { |
| ObserverInternal observer = mAllObservers.valueAt(oIndex); |
| PackageHealthObserver registeredObserver = observer.mRegisteredObserver; |
| if (registeredObserver != null |
| && observer.onPackageFailureLocked( |
| versionedPackage.getPackageName())) { |
| int impact = registeredObserver.onHealthCheckFailed(versionedPackage); |
| if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE |
| && impact < currentObserverImpact) { |
| currentObserverToNotify = registeredObserver; |
| currentObserverImpact = impact; |
| } |
| } |
| } |
| |
| // Execute action with least user impact |
| if (currentObserverToNotify != null) { |
| currentObserverToNotify.execute(versionedPackage); |
| } |
| } |
| } |
| }); |
| } |
| |
| // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also |
| // avoid holding lock? |
| // This currently adds about 7ms extra to shutdown thread |
| /** Writes the package information to file during shutdown. */ |
| public void writeNow() { |
| synchronized (mLock) { |
| // Must only run synchronous tasks as this runs on the ShutdownThread and no other |
| // thread is guaranteed to run during shutdown. |
| if (!mAllObservers.isEmpty()) { |
| mLongTaskHandler.removeCallbacks(this::saveToFileAsync); |
| pruneObserversLocked(); |
| saveToFile(); |
| Slog.i(TAG, "Last write to update package durations"); |
| } |
| } |
| } |
| |
| /** |
| * Enables or disables explicit health checks. |
| * <p> If explicit health checks are enabled, the health check service is started. |
| * <p> If explicit health checks are disabled, pending explicit health check requests are |
| * passed and the health check service is stopped. |
| */ |
| private void setExplicitHealthCheckEnabled(boolean enabled) { |
| synchronized (mLock) { |
| mIsHealthCheckEnabled = enabled; |
| mHealthCheckController.setEnabled(enabled); |
| // Prune to update internal state whenever health check is enabled/disabled |
| syncState("health check state " + (enabled ? "enabled" : "disabled")); |
| } |
| } |
| |
| /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */ |
| @Retention(SOURCE) |
| @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_NONE, |
| PackageHealthObserverImpact.USER_IMPACT_LOW, |
| PackageHealthObserverImpact.USER_IMPACT_MEDIUM, |
| PackageHealthObserverImpact.USER_IMPACT_HIGH}) |
| public @interface PackageHealthObserverImpact { |
| /** No action to take. */ |
| int USER_IMPACT_NONE = 0; |
| /* Action has low user impact, user of a device will barely notice. */ |
| int USER_IMPACT_LOW = 1; |
| /* Action has medium user impact, user of a device will likely notice. */ |
| int USER_IMPACT_MEDIUM = 3; |
| /* Action has high user impact, a last resort, user of a device will be very frustrated. */ |
| int USER_IMPACT_HIGH = 5; |
| } |
| |
| /** Register instances of this interface to receive notifications on package failure. */ |
| public interface PackageHealthObserver { |
| /** |
| * Called when health check fails for the {@code versionedPackage}. |
| * |
| * @return any one of {@link PackageHealthObserverImpact} to express the impact |
| * to the user on {@link #execute} |
| */ |
| @PackageHealthObserverImpact int onHealthCheckFailed(VersionedPackage versionedPackage); |
| |
| /** |
| * Executes mitigation for {@link #onHealthCheckFailed}. |
| * |
| * @return {@code true} if action was executed successfully, {@code false} otherwise |
| */ |
| boolean execute(VersionedPackage versionedPackage); |
| |
| // TODO(b/120598832): Ensure uniqueness? |
| /** |
| * Identifier for the observer, should not change across device updates otherwise the |
| * watchdog may drop observing packages with the old name. |
| */ |
| String getName(); |
| } |
| |
| long getTriggerFailureCount() { |
| synchronized (mLock) { |
| return mTriggerFailureCount; |
| } |
| } |
| |
| /** |
| * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}. |
| */ |
| private void syncRequestsAsync() { |
| mShortTaskHandler.removeCallbacks(this::syncRequests); |
| mShortTaskHandler.post(this::syncRequests); |
| } |
| |
| /** |
| * Syncs health check requests with the {@link ExplicitHealthCheckController}. |
| * Calls to this must be serialized. |
| * |
| * @see #syncRequestsAsync |
| */ |
| private void syncRequests() { |
| Set<String> packages = null; |
| synchronized (mLock) { |
| if (mIsPackagesReady) { |
| packages = getPackagesPendingHealthChecksLocked(); |
| } // else, we will sync requests when packages become ready |
| } |
| |
| // Call outside lock to avoid holding lock when calling into the controller. |
| if (packages != null) { |
| Slog.i(TAG, "Syncing health check requests for packages: " + packages); |
| mHealthCheckController.syncRequests(packages); |
| } |
| } |
| |
| /** |
| * Updates the observers monitoring {@code packageName} that explicit health check has passed. |
| * |
| * <p> This update is strictly for registered observers at the time of the call |
| * Observers that register after this signal will have no knowledge of prior signals and will |
| * effectively behave as if the explicit health check hasn't passed for {@code packageName}. |
| * |
| * <p> {@code packageName} can still be considered failed if reported by |
| * {@link #onPackageFailureLocked} before the package expires. |
| * |
| * <p> Triggered by components outside the system server when they are fully functional after an |
| * update. |
| */ |
| private void onHealthCheckPassed(String packageName) { |
| Slog.i(TAG, "Health check passed for package: " + packageName); |
| boolean isStateChanged = false; |
| |
| synchronized (mLock) { |
| for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) { |
| ObserverInternal observer = mAllObservers.valueAt(observerIdx); |
| MonitoredPackage monitoredPackage = observer.mPackages.get(packageName); |
| |
| if (monitoredPackage != null) { |
| int oldState = monitoredPackage.getHealthCheckStateLocked(); |
| int newState = monitoredPackage.tryPassHealthCheckLocked(); |
| isStateChanged |= oldState != newState; |
| } |
| } |
| } |
| |
| if (isStateChanged) { |
| syncState("health check passed for " + packageName); |
| } |
| } |
| |
| private void onSupportedPackages(List<PackageConfig> supportedPackages) { |
| boolean isStateChanged = false; |
| |
| Map<String, Long> supportedPackageTimeouts = new ArrayMap<>(); |
| Iterator<PackageConfig> it = supportedPackages.iterator(); |
| while (it.hasNext()) { |
| PackageConfig info = it.next(); |
| supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis()); |
| } |
| |
| synchronized (mLock) { |
| Slog.d(TAG, "Received supported packages " + supportedPackages); |
| Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); |
| while (oit.hasNext()) { |
| Iterator<MonitoredPackage> pit = oit.next().mPackages.values().iterator(); |
| while (pit.hasNext()) { |
| MonitoredPackage monitoredPackage = pit.next(); |
| String packageName = monitoredPackage.getName(); |
| int oldState = monitoredPackage.getHealthCheckStateLocked(); |
| int newState; |
| |
| if (supportedPackageTimeouts.containsKey(packageName)) { |
| // Supported packages become ACTIVE if currently INACTIVE |
| newState = monitoredPackage.setHealthCheckActiveLocked( |
| supportedPackageTimeouts.get(packageName)); |
| } else { |
| // Unsupported packages are marked as PASSED unless already FAILED |
| newState = monitoredPackage.tryPassHealthCheckLocked(); |
| } |
| isStateChanged |= oldState != newState; |
| } |
| } |
| } |
| |
| if (isStateChanged) { |
| syncState("updated health check supported packages " + supportedPackages); |
| } |
| } |
| |
| @GuardedBy("mLock") |
| private Set<String> getPackagesPendingHealthChecksLocked() { |
| Slog.d(TAG, "Getting all observed packages pending health checks"); |
| Set<String> packages = new ArraySet<>(); |
| Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); |
| while (oit.hasNext()) { |
| ObserverInternal observer = oit.next(); |
| Iterator<MonitoredPackage> pit = |
| observer.mPackages.values().iterator(); |
| while (pit.hasNext()) { |
| MonitoredPackage monitoredPackage = pit.next(); |
| String packageName = monitoredPackage.getName(); |
| if (monitoredPackage.isPendingHealthChecksLocked()) { |
| packages.add(packageName); |
| } |
| } |
| } |
| return packages; |
| } |
| |
| /** |
| * Syncs the state of the observers. |
| * |
| * <p> Prunes all observers, saves new state to disk, syncs health check requests with the |
| * health check service and schedules the next state sync. |
| */ |
| private void syncState(String reason) { |
| synchronized (mLock) { |
| Slog.i(TAG, "Syncing state, reason: " + reason); |
| pruneObserversLocked(); |
| |
| saveToFileAsync(); |
| syncRequestsAsync(); |
| |
| // Done syncing state, schedule the next state sync |
| scheduleNextSyncStateLocked(); |
| } |
| } |
| |
| private void syncStateWithScheduledReason() { |
| syncState("scheduled"); |
| } |
| |
| @GuardedBy("mLock") |
| private void scheduleNextSyncStateLocked() { |
| long durationMs = getNextStateSyncMillisLocked(); |
| mShortTaskHandler.removeCallbacks(this::syncStateWithScheduledReason); |
| if (durationMs == Long.MAX_VALUE) { |
| Slog.i(TAG, "Cancelling state sync, nothing to sync"); |
| mUptimeAtLastStateSync = 0; |
| } else { |
| Slog.i(TAG, "Scheduling next state sync in " + durationMs + "ms"); |
| mUptimeAtLastStateSync = SystemClock.uptimeMillis(); |
| mShortTaskHandler.postDelayed(this::syncStateWithScheduledReason, durationMs); |
| } |
| } |
| |
| /** |
| * Returns the next duration in millis to sync the watchdog state. |
| * |
| * @returns Long#MAX_VALUE if there are no observed packages. |
| */ |
| @GuardedBy("mLock") |
| private long getNextStateSyncMillisLocked() { |
| long shortestDurationMs = Long.MAX_VALUE; |
| for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { |
| ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex).mPackages; |
| for (int pIndex = 0; pIndex < packages.size(); pIndex++) { |
| MonitoredPackage mp = packages.valueAt(pIndex); |
| long duration = mp.getShortestScheduleDurationMsLocked(); |
| if (duration < shortestDurationMs) { |
| shortestDurationMs = duration; |
| } |
| } |
| } |
| return shortestDurationMs; |
| } |
| |
| /** |
| * Removes {@code elapsedMs} milliseconds from all durations on monitored packages |
| * and updates other internal state. |
| */ |
| @GuardedBy("mLock") |
| private void pruneObserversLocked() { |
| long elapsedMs = mUptimeAtLastStateSync == 0 |
| ? 0 : SystemClock.uptimeMillis() - mUptimeAtLastStateSync; |
| if (elapsedMs <= 0) { |
| Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms"); |
| return; |
| } |
| |
| Slog.i(TAG, "Removing " + elapsedMs + "ms from all packages on all observers"); |
| Iterator<ObserverInternal> it = mAllObservers.values().iterator(); |
| while (it.hasNext()) { |
| ObserverInternal observer = it.next(); |
| Set<MonitoredPackage> failedPackages = |
| observer.prunePackagesLocked(elapsedMs); |
| if (!failedPackages.isEmpty()) { |
| onHealthCheckFailed(observer, failedPackages); |
| } |
| if (observer.mPackages.isEmpty()) { |
| Slog.i(TAG, "Discarding observer " + observer.mName + ". All packages expired"); |
| it.remove(); |
| } |
| } |
| } |
| |
| private void onHealthCheckFailed(ObserverInternal observer, |
| Set<MonitoredPackage> failedPackages) { |
| mLongTaskHandler.post(() -> { |
| synchronized (mLock) { |
| PackageHealthObserver registeredObserver = observer.mRegisteredObserver; |
| if (registeredObserver != null) { |
| Iterator<MonitoredPackage> it = failedPackages.iterator(); |
| while (it.hasNext()) { |
| String failedPackage = it.next().getName(); |
| Slog.i(TAG, "Explicit health check failed for package " + failedPackage); |
| VersionedPackage versionedPkg = getVersionedPackage(failedPackage); |
| if (versionedPkg == null) { |
| Slog.w(TAG, "Explicit health check failed but could not find package " |
| + failedPackage); |
| // TODO(b/120598832): Skip. We only continue to pass tests for now since |
| // the tests don't install any packages |
| versionedPkg = new VersionedPackage(failedPackage, 0L); |
| } |
| registeredObserver.execute(versionedPkg); |
| } |
| } |
| } |
| }); |
| } |
| |
| @Nullable |
| private VersionedPackage getVersionedPackage(String packageName) { |
| final PackageManager pm = mContext.getPackageManager(); |
| if (pm == null) { |
| return null; |
| } |
| try { |
| final long versionCode = pm.getPackageInfo( |
| packageName, 0 /* flags */).getLongVersionCode(); |
| return new VersionedPackage(packageName, versionCode); |
| } catch (PackageManager.NameNotFoundException e) { |
| return null; |
| } |
| } |
| |
| /** |
| * Loads mAllObservers from file. |
| * |
| * <p>Note that this is <b>not</b> thread safe and should only called be called |
| * from the constructor. |
| */ |
| private void loadFromFile() { |
| InputStream infile = null; |
| mAllObservers.clear(); |
| try { |
| infile = mPolicyFile.openRead(); |
| final XmlPullParser parser = Xml.newPullParser(); |
| parser.setInput(infile, StandardCharsets.UTF_8.name()); |
| XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG); |
| int outerDepth = parser.getDepth(); |
| while (XmlUtils.nextElementWithin(parser, outerDepth)) { |
| ObserverInternal observer = ObserverInternal.read(parser, this); |
| if (observer != null) { |
| mAllObservers.put(observer.mName, observer); |
| } |
| } |
| } catch (FileNotFoundException e) { |
| // Nothing to monitor |
| } catch (IOException | NumberFormatException | XmlPullParserException e) { |
| Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e); |
| mPolicyFile.delete(); |
| } finally { |
| IoUtils.closeQuietly(infile); |
| } |
| } |
| |
| /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */ |
| private void setPropertyChangedListenerLocked() { |
| DeviceConfig.addOnPropertiesChangedListener( |
| DeviceConfig.NAMESPACE_ROLLBACK, |
| mContext.getMainExecutor(), |
| (properties) -> { |
| if (!DeviceConfig.NAMESPACE_ROLLBACK.equals(properties.getNamespace())) { |
| return; |
| } |
| updateConfigs(); |
| }); |
| } |
| |
| /** |
| * Health check is enabled or disabled after reading the flags |
| * from DeviceConfig. |
| */ |
| private void updateConfigs() { |
| synchronized (mLock) { |
| mTriggerFailureCount = DeviceConfig.getInt( |
| DeviceConfig.NAMESPACE_ROLLBACK, |
| PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT, |
| DEFAULT_TRIGGER_FAILURE_COUNT); |
| if (mTriggerFailureCount <= 0) { |
| mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; |
| } |
| |
| mTriggerFailureDurationMs = DeviceConfig.getInt( |
| DeviceConfig.NAMESPACE_ROLLBACK, |
| PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS, |
| DEFAULT_TRIGGER_FAILURE_DURATION_MS); |
| if (mTriggerFailureDurationMs <= 0) { |
| mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_COUNT; |
| } |
| |
| setExplicitHealthCheckEnabled(DeviceConfig.getBoolean( |
| DeviceConfig.NAMESPACE_ROLLBACK, |
| PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED, |
| DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED)); |
| } |
| } |
| |
| private void registerNetworkStackHealthListener() { |
| // TODO: have an internal method to trigger a rollback by reporting high severity errors, |
| // and rely on ActivityManager to inform the watchdog of severe network stack crashes |
| // instead of having this listener in parallel. |
| mNetworkStackClient.registerHealthListener( |
| packageName -> { |
| final VersionedPackage pkg = getVersionedPackage(packageName); |
| if (pkg == null) { |
| Slog.wtf(TAG, "NetworkStack failed but could not find its package"); |
| return; |
| } |
| // This is a severe failure and recovery should be attempted immediately. |
| // TODO: have a better way to handle such failures. |
| final List<VersionedPackage> pkgList = Collections.singletonList(pkg); |
| final long failureCount = getTriggerFailureCount(); |
| for (int i = 0; i < failureCount; i++) { |
| onPackageFailure(pkgList); |
| } |
| }); |
| } |
| |
| /** |
| * Persists mAllObservers to file. Threshold information is ignored. |
| */ |
| private boolean saveToFile() { |
| Slog.i(TAG, "Saving observer state to file"); |
| synchronized (mLock) { |
| FileOutputStream stream; |
| try { |
| stream = mPolicyFile.startWrite(); |
| } catch (IOException e) { |
| Slog.w(TAG, "Cannot update monitored packages", e); |
| return false; |
| } |
| |
| try { |
| XmlSerializer out = new FastXmlSerializer(); |
| out.setOutput(stream, StandardCharsets.UTF_8.name()); |
| out.startDocument(null, true); |
| out.startTag(null, TAG_PACKAGE_WATCHDOG); |
| out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION)); |
| for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { |
| mAllObservers.valueAt(oIndex).writeLocked(out); |
| } |
| out.endTag(null, TAG_PACKAGE_WATCHDOG); |
| out.endDocument(); |
| mPolicyFile.finishWrite(stream); |
| return true; |
| } catch (IOException e) { |
| Slog.w(TAG, "Failed to save monitored packages, restoring backup", e); |
| mPolicyFile.failWrite(stream); |
| return false; |
| } finally { |
| IoUtils.closeQuietly(stream); |
| } |
| } |
| } |
| |
| private void saveToFileAsync() { |
| if (!mLongTaskHandler.hasCallbacks(this::saveToFile)) { |
| mLongTaskHandler.post(this::saveToFile); |
| } |
| } |
| |
| /** |
| * Represents an observer monitoring a set of packages along with the failure thresholds for |
| * each package. |
| * |
| * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing |
| * instances of this class. |
| */ |
| //TODO(b/120598832): Remove 'm' from non-private fields |
| private static class ObserverInternal { |
| public final String mName; |
| //TODO(b/120598832): Add getter for mPackages |
| @GuardedBy("mLock") |
| public final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>(); |
| @Nullable |
| @GuardedBy("mLock") |
| public PackageHealthObserver mRegisteredObserver; |
| |
| ObserverInternal(String name, List<MonitoredPackage> packages) { |
| mName = name; |
| updatePackagesLocked(packages); |
| } |
| |
| /** |
| * Writes important {@link MonitoredPackage} details for this observer to file. |
| * Does not persist any package failure thresholds. |
| */ |
| @GuardedBy("mLock") |
| public boolean writeLocked(XmlSerializer out) { |
| try { |
| out.startTag(null, TAG_OBSERVER); |
| out.attribute(null, ATTR_NAME, mName); |
| for (int i = 0; i < mPackages.size(); i++) { |
| MonitoredPackage p = mPackages.valueAt(i); |
| p.writeLocked(out); |
| } |
| out.endTag(null, TAG_OBSERVER); |
| return true; |
| } catch (IOException e) { |
| Slog.w(TAG, "Cannot save observer", e); |
| return false; |
| } |
| } |
| |
| @GuardedBy("mLock") |
| public void updatePackagesLocked(List<MonitoredPackage> packages) { |
| for (int pIndex = 0; pIndex < packages.size(); pIndex++) { |
| MonitoredPackage p = packages.get(pIndex); |
| mPackages.put(p.mName, p); |
| } |
| } |
| |
| /** |
| * Reduces the monitoring durations of all packages observed by this observer by |
| * {@code elapsedMs}. If any duration is less than 0, the package is removed from |
| * observation. If any health check duration is less than 0, the health check result |
| * is evaluated. |
| * |
| * @return a {@link Set} of packages that were removed from the observer without explicit |
| * health check passing, or an empty list if no package expired for which an explicit health |
| * check was still pending |
| */ |
| @GuardedBy("mLock") |
| private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) { |
| Set<MonitoredPackage> failedPackages = new ArraySet<>(); |
| Iterator<MonitoredPackage> it = mPackages.values().iterator(); |
| while (it.hasNext()) { |
| MonitoredPackage p = it.next(); |
| int oldState = p.getHealthCheckStateLocked(); |
| int newState = p.handleElapsedTimeLocked(elapsedMs); |
| if (oldState != MonitoredPackage.STATE_FAILED |
| && newState == MonitoredPackage.STATE_FAILED) { |
| Slog.i(TAG, "Package " + p.mName + " failed health check"); |
| failedPackages.add(p); |
| } |
| if (p.isExpiredLocked()) { |
| it.remove(); |
| } |
| } |
| return failedPackages; |
| } |
| |
| /** |
| * Increments failure counts of {@code packageName}. |
| * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise |
| */ |
| @GuardedBy("mLock") |
| public boolean onPackageFailureLocked(String packageName) { |
| MonitoredPackage p = mPackages.get(packageName); |
| if (p != null) { |
| return p.onFailureLocked(); |
| } |
| return false; |
| } |
| |
| /** |
| * Returns one ObserverInternal from the {@code parser} and advances its state. |
| * |
| * <p>Note that this method is <b>not</b> thread safe. It should only be called from |
| * #loadFromFile which in turn is only called on construction of the |
| * singleton PackageWatchdog. |
| **/ |
| public static ObserverInternal read(XmlPullParser parser, PackageWatchdog watchdog) { |
| String observerName = null; |
| if (TAG_OBSERVER.equals(parser.getName())) { |
| observerName = parser.getAttributeValue(null, ATTR_NAME); |
| if (TextUtils.isEmpty(observerName)) { |
| Slog.wtf(TAG, "Unable to read observer name"); |
| return null; |
| } |
| } |
| List<MonitoredPackage> packages = new ArrayList<>(); |
| int innerDepth = parser.getDepth(); |
| try { |
| while (XmlUtils.nextElementWithin(parser, innerDepth)) { |
| if (TAG_PACKAGE.equals(parser.getName())) { |
| try { |
| String packageName = parser.getAttributeValue(null, ATTR_NAME); |
| long duration = Long.parseLong( |
| parser.getAttributeValue(null, ATTR_DURATION)); |
| long healthCheckDuration = Long.parseLong( |
| parser.getAttributeValue(null, |
| ATTR_EXPLICIT_HEALTH_CHECK_DURATION)); |
| boolean hasPassedHealthCheck = Boolean.parseBoolean( |
| parser.getAttributeValue(null, ATTR_PASSED_HEALTH_CHECK)); |
| if (!TextUtils.isEmpty(packageName)) { |
| packages.add(watchdog.new MonitoredPackage(packageName, duration, |
| healthCheckDuration, hasPassedHealthCheck)); |
| } |
| } catch (NumberFormatException e) { |
| Slog.wtf(TAG, "Skipping package for observer " + observerName, e); |
| continue; |
| } |
| } |
| } |
| } catch (XmlPullParserException | IOException e) { |
| Slog.wtf(TAG, "Unable to read observer " + observerName, e); |
| return null; |
| } |
| if (packages.isEmpty()) { |
| return null; |
| } |
| return new ObserverInternal(observerName, packages); |
| } |
| } |
| |
| /** |
| * Represents a package and its health check state along with the time |
| * it should be monitored for. |
| * |
| * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing |
| * instances of this class. |
| */ |
| class MonitoredPackage { |
| // Health check states |
| // TODO(b/120598832): Prefix with HEALTH_CHECK |
| // mName has not passed health check but has requested a health check |
| public static final int STATE_ACTIVE = 0; |
| // mName has not passed health check and has not requested a health check |
| public static final int STATE_INACTIVE = 1; |
| // mName has passed health check |
| public static final int STATE_PASSED = 2; |
| // mName has failed health check |
| public static final int STATE_FAILED = 3; |
| |
| //TODO(b/120598832): VersionedPackage? |
| private final String mName; |
| // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after |
| // methods that could change the health check state: handleElapsedTimeLocked and |
| // tryPassHealthCheckLocked |
| private int mHealthCheckState = STATE_INACTIVE; |
| // Whether an explicit health check has passed. |
| // This value in addition with mHealthCheckDurationMs determines the health check state |
| // of the package, see #getHealthCheckStateLocked |
| @GuardedBy("mLock") |
| private boolean mHasPassedHealthCheck; |
| // System uptime duration to monitor package. |
| @GuardedBy("mLock") |
| private long mDurationMs; |
| // System uptime duration to check the result of an explicit health check |
| // Initially, MAX_VALUE until we get a value from the health check service |
| // and request health checks. |
| // This value in addition with mHasPassedHealthCheck determines the health check state |
| // of the package, see #getHealthCheckStateLocked |
| @GuardedBy("mLock") |
| private long mHealthCheckDurationMs = Long.MAX_VALUE; |
| // System uptime of first package failure |
| @GuardedBy("mLock") |
| private long mUptimeStartMs; |
| // Number of failures since mUptimeStartMs |
| @GuardedBy("mLock") |
| private int mFailures; |
| |
| MonitoredPackage(String name, long durationMs, boolean hasPassedHealthCheck) { |
| this(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck); |
| } |
| |
| MonitoredPackage(String name, long durationMs, long healthCheckDurationMs, |
| boolean hasPassedHealthCheck) { |
| mName = name; |
| mDurationMs = durationMs; |
| mHealthCheckDurationMs = healthCheckDurationMs; |
| mHasPassedHealthCheck = hasPassedHealthCheck; |
| updateHealthCheckStateLocked(); |
| } |
| |
| /** Writes the salient fields to disk using {@code out}. */ |
| @GuardedBy("mLock") |
| public void writeLocked(XmlSerializer out) throws IOException { |
| out.startTag(null, TAG_PACKAGE); |
| out.attribute(null, ATTR_NAME, mName); |
| out.attribute(null, ATTR_DURATION, String.valueOf(mDurationMs)); |
| out.attribute(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION, |
| String.valueOf(mHealthCheckDurationMs)); |
| out.attribute(null, ATTR_PASSED_HEALTH_CHECK, |
| String.valueOf(mHasPassedHealthCheck)); |
| out.endTag(null, TAG_PACKAGE); |
| } |
| |
| /** |
| * Increment package failures or resets failure count depending on the last package failure. |
| * |
| * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise |
| */ |
| @GuardedBy("mLock") |
| public boolean onFailureLocked() { |
| final long now = SystemClock.uptimeMillis(); |
| final long duration = now - mUptimeStartMs; |
| if (duration > mTriggerFailureDurationMs) { |
| // TODO(b/120598832): Reseting to 1 is not correct |
| // because there may be more than 1 failure in the last trigger window from now |
| // This is the RescueParty impl, will leave for now |
| mFailures = 1; |
| mUptimeStartMs = now; |
| } else { |
| mFailures++; |
| } |
| boolean failed = mFailures >= mTriggerFailureCount; |
| if (failed) { |
| mFailures = 0; |
| } |
| return failed; |
| } |
| |
| /** |
| * Sets the initial health check duration. |
| * |
| * @return the new health check state |
| */ |
| @GuardedBy("mLock") |
| public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) { |
| if (initialHealthCheckDurationMs <= 0) { |
| Slog.wtf(TAG, "Cannot set non-positive health check duration " |
| + initialHealthCheckDurationMs + "ms for package " + mName |
| + ". Using total duration " + mDurationMs + "ms instead"); |
| initialHealthCheckDurationMs = mDurationMs; |
| } |
| if (mHealthCheckState == STATE_INACTIVE) { |
| // Transitions to ACTIVE |
| mHealthCheckDurationMs = initialHealthCheckDurationMs; |
| } |
| return updateHealthCheckStateLocked(); |
| } |
| |
| /** |
| * Updates the monitoring durations of the package. |
| * |
| * @return the new health check state |
| */ |
| @GuardedBy("mLock") |
| public int handleElapsedTimeLocked(long elapsedMs) { |
| if (elapsedMs <= 0) { |
| Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + mName); |
| return mHealthCheckState; |
| } |
| // Transitions to FAILED if now <= 0 and health check not passed |
| mDurationMs -= elapsedMs; |
| if (mHealthCheckState == STATE_ACTIVE) { |
| // We only update health check durations if we have #setHealthCheckActiveLocked |
| // This ensures we don't leave the INACTIVE state for an unexpected elapsed time |
| // Transitions to FAILED if now <= 0 and health check not passed |
| mHealthCheckDurationMs -= elapsedMs; |
| } |
| return updateHealthCheckStateLocked(); |
| } |
| |
| /** |
| * Marks the health check as passed and transitions to {@link #STATE_PASSED} |
| * if not yet {@link #STATE_FAILED}. |
| * |
| * @return the new health check state |
| */ |
| @GuardedBy("mLock") |
| public int tryPassHealthCheckLocked() { |
| if (mHealthCheckState != STATE_FAILED) { |
| // FAILED is a final state so only pass if we haven't failed |
| // Transition to PASSED |
| mHasPassedHealthCheck = true; |
| } |
| return updateHealthCheckStateLocked(); |
| } |
| |
| /** Returns the monitored package name. */ |
| private String getName() { |
| return mName; |
| } |
| |
| //TODO(b/120598832): IntDef |
| /** |
| * Returns the current health check state, any of {@link #STATE_ACTIVE}, |
| * {@link #STATE_INACTIVE} or {@link #STATE_PASSED} |
| */ |
| @GuardedBy("mLock") |
| public int getHealthCheckStateLocked() { |
| return mHealthCheckState; |
| } |
| |
| /** |
| * Returns the shortest duration before the package should be scheduled for a prune. |
| * |
| * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled |
| */ |
| @GuardedBy("mLock") |
| public long getShortestScheduleDurationMsLocked() { |
| // Consider health check duration only if #isPendingHealthChecksLocked is true |
| return Math.min(toPositive(mDurationMs), |
| isPendingHealthChecksLocked() |
| ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE); |
| } |
| |
| /** |
| * Returns {@code true} if the total duration left to monitor the package is less than or |
| * equal to 0 {@code false} otherwise. |
| */ |
| @GuardedBy("mLock") |
| public boolean isExpiredLocked() { |
| return mDurationMs <= 0; |
| } |
| |
| /** |
| * Returns {@code true} if the package, {@link #getName} is expecting health check results |
| * {@code false} otherwise. |
| */ |
| @GuardedBy("mLock") |
| public boolean isPendingHealthChecksLocked() { |
| return mHealthCheckState == STATE_ACTIVE || mHealthCheckState == STATE_INACTIVE; |
| } |
| |
| /** |
| * Updates the health check state based on {@link #mHasPassedHealthCheck} |
| * and {@link #mHealthCheckDurationMs}. |
| * |
| * @return the new health check state |
| */ |
| @GuardedBy("mLock") |
| private int updateHealthCheckStateLocked() { |
| int oldState = mHealthCheckState; |
| if (mHasPassedHealthCheck) { |
| // Set final state first to avoid ambiguity |
| mHealthCheckState = STATE_PASSED; |
| } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) { |
| // Set final state first to avoid ambiguity |
| mHealthCheckState = STATE_FAILED; |
| } else if (mHealthCheckDurationMs == Long.MAX_VALUE) { |
| mHealthCheckState = STATE_INACTIVE; |
| } else { |
| mHealthCheckState = STATE_ACTIVE; |
| } |
| Slog.i(TAG, "Updated health check state for package " + mName + ": " |
| + toString(oldState) + " -> " + toString(mHealthCheckState)); |
| return mHealthCheckState; |
| } |
| |
| /** Returns a {@link String} representation of the current health check state. */ |
| private String toString(int state) { |
| switch (state) { |
| case STATE_ACTIVE: |
| return "ACTIVE"; |
| case STATE_INACTIVE: |
| return "INACTIVE"; |
| case STATE_PASSED: |
| return "PASSED"; |
| case STATE_FAILED: |
| return "FAILED"; |
| default: |
| return "UNKNOWN"; |
| } |
| } |
| |
| /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */ |
| private long toPositive(long value) { |
| return value > 0 ? value : Long.MAX_VALUE; |
| } |
| } |
| } |