From 2a05039b444d6d679d03d5247b7ae54e73206c1a Mon Sep 17 00:00:00 2001 From: Zimuzo Date: Thu, 9 May 2019 12:51:33 +0100 Subject: [PATCH] Allow pausing Watchdog on the current thread Since we start the watchdog early during boot, we may have some false positives where the watchdog thinks a thread is blocked but it is just running a long task. This cl enables us pause the watchdog triggering for the current thread and resume the triggering after the long running task. An alternative would be pausing for a specified duration without an explicit resume but that may be difficult to find an upper bound for tasks across all devices. For now the primary long running task is dexopting which happens on the main thread during boot. We pause the watchdog triggering on the main thread and resume afterwards. Test: Verified with logs that pausing the Watchdog pauses triggering and resuming resumes triggering Bug: 132076426 Change-Id: I3876c41e6d0e41d044a5b1d5e57f894c7fb4fb0e --- .../java/com/android/server/Watchdog.java | 79 +++++++++++++++++-- .../java/com/android/server/SystemServer.java | 3 + 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/services/core/java/com/android/server/Watchdog.java b/services/core/java/com/android/server/Watchdog.java index f9aaf11c75f05..a7fb99f8b004e 100644 --- a/services/core/java/com/android/server/Watchdog.java +++ b/services/core/java/com/android/server/Watchdog.java @@ -140,6 +140,7 @@ public class Watchdog extends Thread { private boolean mCompleted; private Monitor mCurrentMonitor; private long mStartTime; + private int mPauseCount; HandlerChecker(Handler handler, String name, long waitMaxMillis) { mHandler = handler; @@ -160,17 +161,18 @@ public class Watchdog extends Thread { mMonitors.addAll(mMonitorQueue); mMonitorQueue.clear(); } - if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) { + if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) + || (mPauseCount > 0)) { + // Don't schedule until after resume OR // If the target looper has recently been polling, then // there is no reason to enqueue our checker on it since that // is as good as it not being deadlocked. This avoid having - // to do a context switch to check the thread. Note that we - // only do this if mCheckReboot is false and we have no - // monitors, since those would need to be executed at this point. + // to do a context switch to check the thread. Note that we + // only do this if we have no monitors since those would need to + // be executed at this point. mCompleted = true; return; } - if (!mCompleted) { // we already have a check in flight, so no need return; @@ -236,6 +238,28 @@ public class Watchdog extends Thread { mCurrentMonitor = null; } } + + /** Pause the HandlerChecker. */ + public void pauseLocked(String reason) { + mPauseCount++; + // Mark as completed, because there's a chance we called this after the watchog + // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure + // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED' + mCompleted = true; + Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: " + + reason + ". Pause count: " + mPauseCount); + } + + /** Resume the HandlerChecker from the last {@link #pauseLocked}. */ + public void resumeLocked(String reason) { + if (mPauseCount > 0) { + mPauseCount--; + Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: " + + reason + ". Pause count: " + mPauseCount); + } else { + Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName); + } + } } final class RebootRequestReceiver extends BroadcastReceiver { @@ -363,6 +387,51 @@ public class Watchdog extends Thread { } } + /** + * Pauses Watchdog action for the currently running thread. Useful before executing long running + * operations that could falsely trigger the watchdog. Each call to this will require a matching + * call to {@link #resumeWatchingCurrentThread}. + * + *

If the current thread has not been added to the Watchdog, this call is a no-op. + * + *

If the Watchdog is already paused for the current thread, this call adds + * adds another pause and will require an additional {@link #resumeCurrentThread} to resume. + * + *

Note: Use with care, as any deadlocks on the current thread will be undetected until all + * pauses have been resumed. + */ + public void pauseWatchingCurrentThread(String reason) { + synchronized (this) { + for (HandlerChecker hc : mHandlerCheckers) { + if (Thread.currentThread().equals(hc.getThread())) { + hc.pauseLocked(reason); + } + } + } + } + + /** + * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running + * thread. + * + *

If the current thread has not been added to the Watchdog, this call is a no-op. + * + *

If the Watchdog action for the current thread is already resumed, this call logs a wtf. + * + *

If all pauses have been resumed, the Watchdog action is finally resumed, otherwise, + * the Watchdog action for the current thread remains paused until resume is called at least + * as many times as the calls to pause. + */ + public void resumeWatchingCurrentThread(String reason) { + synchronized (this) { + for (HandlerChecker hc : mHandlerCheckers) { + if (Thread.currentThread().equals(hc.getThread())) { + hc.resumeLocked(reason); + } + } + } + } + /** * Perform a full reboot of the system. */ diff --git a/services/java/com/android/server/SystemServer.java b/services/java/com/android/server/SystemServer.java index 4ac8342e6e607..8ec2960a2da89 100644 --- a/services/java/com/android/server/SystemServer.java +++ b/services/java/com/android/server/SystemServer.java @@ -1174,9 +1174,12 @@ public final class SystemServer { if (!mOnlyCore) { traceBeginAndSlog("UpdatePackagesIfNeeded"); try { + Watchdog.getInstance().pauseWatchingCurrentThread("dexopt"); mPackageManagerService.updatePackagesIfNeeded(); } catch (Throwable e) { reportWtf("update packages", e); + } finally { + Watchdog.getInstance().resumeWatchingCurrentThread("dexopt"); } traceEnd(); }