Commit 56cabb93 authored by Douglas Anderson's avatar Douglas Anderson Committed by Rafael J. Wysocki
Browse files

PM: sleep: Allow configuring the DPM watchdog to warn earlier than panic



Allow configuring the DPM watchdog to warn about slow suspend/resume
functions without causing a system panic(). This allows you to set the
DPM_WATCHDOG_WARNING_TIMEOUT to something like 5 or 10 seconds to get
warnings about slow suspend/resume functions that eventually succeed.

Signed-off-by: default avatarDouglas Anderson <dianders@chromium.org>
Reviewed-by: default avatarTomasz Figa <tfiga@chromium.org>
Link: https://patch.msgid.link/20250109125957.v2.1.I4554f931b8da97948f308ecc651b124338ee9603@changeid


[ rjw: Subject edit ]
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 96484d21
Loading
Loading
Loading
Loading
+19 −5
Original line number Diff line number Diff line
@@ -496,6 +496,7 @@ struct dpm_watchdog {
	struct device		*dev;
	struct task_struct	*tsk;
	struct timer_list	timer;
	bool			fatal;
};

#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
@@ -512,13 +513,25 @@ struct dpm_watchdog {
static void dpm_watchdog_handler(struct timer_list *t)
{
	struct dpm_watchdog *wd = from_timer(wd, t, timer);
	struct timer_list *timer = &wd->timer;
	unsigned int time_left;

	if (wd->fatal) {
		dev_emerg(wd->dev, "**** DPM device timeout ****\n");
		show_stack(wd->tsk, NULL, KERN_EMERG);
		panic("%s %s: unrecoverable failure\n",
			dev_driver_string(wd->dev), dev_name(wd->dev));
	}

	time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
	dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n",
		 CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left);
	show_stack(wd->tsk, NULL, KERN_WARNING);

	wd->fatal = true;
	mod_timer(timer, jiffies + HZ * time_left);
}

/**
 * dpm_watchdog_set - Enable pm watchdog for given device.
 * @wd: Watchdog. Must be allocated on the stack.
@@ -530,10 +543,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)

	wd->dev = dev;
	wd->tsk = current;
	wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;

	timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
	/* use same timeout value for both suspend and resume */
	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
	add_timer(timer);
}

+20 −1
Original line number Diff line number Diff line
@@ -257,11 +257,30 @@ config DPM_WATCHDOG
	  boot session.

config DPM_WATCHDOG_TIMEOUT
	int "Watchdog timeout in seconds"
	int "Watchdog timeout to panic in seconds"
	range 1 120
	default 120
	depends on DPM_WATCHDOG

config DPM_WATCHDOG_WARNING_TIMEOUT
	int "Watchdog timeout to warn in seconds"
	range 1 DPM_WATCHDOG_TIMEOUT
	default DPM_WATCHDOG_TIMEOUT
	depends on DPM_WATCHDOG
	help
	  If the DPM watchdog warning timeout and main timeout are
	  different then a non-fatal warning (with a stack trace of
	  the stuck suspend routine) will be printed when the warning
	  timeout expires. If the suspend routine gets un-stuck
	  before the main timeout expires then no other action is
	  taken. If the routine continues to be stuck and the main
	  timeout expires then an emergency-level message and stack
	  trace will be printed and the system will panic.

	  If the warning timeout is equal to the main timeout (the
	  default) then the warning will never happen and the system
	  will jump straight to panic when the main timeout expires.

config PM_TRACE
	bool
	help