Commit 1dcb98bb authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Pass locked CPU parameter to scx_hardlockup() and add docs



With the buddy lockup detector, smp_processor_id() returns the detecting CPU,
not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the
locked CPU number from watchdog_hardlockup_check() as a parameter instead.

Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and
scx_rcu_cpu_stall() documenting their return value semantics.

Suggested-by: default avatarDoug Anderson <dianders@chromium.org>
Reviewed-by: default avatarDouglas Anderson <dianders@chromium.org>
Acked-by: default avatarAndrea Righi <arighi@nvidia.com>
Reviewed-by: default avatarEmil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 67932f69
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -230,7 +230,7 @@ struct sched_ext_entity {
void sched_ext_dead(struct task_struct *p);
void print_scx_info(const char *log_lvl, struct task_struct *p);
void scx_softlockup(u32 dur_s);
bool scx_hardlockup(void);
bool scx_hardlockup(int cpu);
bool scx_rcu_cpu_stall(void);

#else	/* !CONFIG_SCHED_CLASS_EXT */
@@ -238,7 +238,7 @@ bool scx_rcu_cpu_stall(void);
static inline void sched_ext_dead(struct task_struct *p) {}
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
static inline void scx_softlockup(u32 dur_s) {}
static inline bool scx_hardlockup(void) { return false; }
static inline bool scx_hardlockup(int cpu) { return false; }
static inline bool scx_rcu_cpu_stall(void) { return false; }

#endif	/* CONFIG_SCHED_CLASS_EXT */
+22 −3
Original line number Diff line number Diff line
@@ -3687,6 +3687,17 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
	return false;
}

/**
 * handle_lockup - sched_ext common lockup handler
 * @fmt: format string
 *
 * Called on system stall or lockup condition and initiates abort of sched_ext
 * if enabled, which may resolve the reported lockup.
 *
 * Returns %true if sched_ext is enabled and abort was initiated, which may
 * resolve the lockup. %false if sched_ext is not enabled or abort was already
 * initiated by someone else.
 */
static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
{
	struct scx_sched *sch;
@@ -3718,6 +3729,10 @@ static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
 * that may not be caused by the current BPF scheduler, try kicking out the
 * current scheduler in an attempt to recover the system to a good state before
 * issuing panics.
 *
 * Returns %true if sched_ext is enabled and abort was initiated, which may
 * resolve the reported RCU stall. %false if sched_ext is not enabled or someone
 * else already initiated abort.
 */
bool scx_rcu_cpu_stall(void)
{
@@ -3750,14 +3765,18 @@ void scx_softlockup(u32 dur_s)
 * numerous affinitized tasks in a single queue and directing all CPUs at it.
 * Try kicking out the current scheduler in an attempt to recover the system to
 * a good state before taking more drastic actions.
 *
 * Returns %true if sched_ext is enabled and abort was initiated, which may
 * resolve the reported hardlockdup. %false if sched_ext is not enabled or
 * someone else already initiated abort.
 */
bool scx_hardlockup(void)
bool scx_hardlockup(int cpu)
{
	if (!handle_lockup("hard lockup - CPU %d", smp_processor_id()))
	if (!handle_lockup("hard lockup - CPU %d", cpu))
		return false;

	printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n",
			smp_processor_id());
			cpu);
	return true;
}

+1 −1
Original line number Diff line number Diff line
@@ -203,7 +203,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
		 * only once when sched_ext is enabled and will immediately
		 * abort the BPF scheduler and print out a warning message.
		 */
		if (scx_hardlockup())
		if (scx_hardlockup(cpu))
			return;

		/* Only print hardlockups once. */