Commit 4ba54a6c authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Refactor lockup handlers into handle_lockup()



scx_rcu_cpu_stall() and scx_softlockup() share the same pattern: check if the
scheduler is enabled under RCU read lock and trigger an error if so. Extract
the common pattern into handle_lockup() helper. Add scx_verror() macro and use
guard(rcu)().

This simplifies both handlers, reduces code duplication, and prepares for
hardlockup handling.

Reviewed-by: default avatarDan Schatzberg <schatzberg.dan@gmail.com>
Reviewed-by: default avatarAndrea Righi <arighi@nvidia.com>
Cc: Emil Tsalapatis <etsal@meta.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent f2fe382e
Loading
Loading
Loading
Loading
+25 −40
Original line number Diff line number Diff line
@@ -192,6 +192,7 @@ static __printf(4, 5) bool scx_exit(struct scx_sched *sch,
}

#define scx_error(sch, fmt, args...)	scx_exit((sch), SCX_EXIT_ERROR, 0, fmt, ##args)
#define scx_verror(sch, fmt, args)	scx_vexit((sch), SCX_EXIT_ERROR, 0, fmt, args)

#define SCX_HAS_OP(sch, op)	test_bit(SCX_OP_IDX(op), (sch)->has_op)

@@ -3654,39 +3655,40 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
	return false;
}

/**
 * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler
 *
 * While there are various reasons why RCU CPU stalls can occur on a system
 * that may not be caused by the current BPF scheduler, try kicking out the
 * current scheduler in an attempt to recover the system to a good state before
 * issuing panics.
 */
bool scx_rcu_cpu_stall(void)
static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
{
	struct scx_sched *sch;
	va_list args;

	rcu_read_lock();
	guard(rcu)();

	sch = rcu_dereference(scx_root);
	if (unlikely(!sch)) {
		rcu_read_unlock();
	if (unlikely(!sch))
		return false;
	}

	switch (scx_enable_state()) {
	case SCX_ENABLING:
	case SCX_ENABLED:
		break;
		va_start(args, fmt);
		scx_verror(sch, fmt, args);
		va_end(args);
		return true;
	default:
		rcu_read_unlock();
		return false;
	}
}

	scx_error(sch, "RCU CPU stall detected!");
	rcu_read_unlock();

	return true;
/**
 * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler
 *
 * While there are various reasons why RCU CPU stalls can occur on a system
 * that may not be caused by the current BPF scheduler, try kicking out the
 * current scheduler in an attempt to recover the system to a good state before
 * issuing panics.
 */
bool scx_rcu_cpu_stall(void)
{
	return handle_lockup("RCU CPU stall detected!");
}

/**
@@ -3701,28 +3703,11 @@ bool scx_rcu_cpu_stall(void)
 */
void scx_softlockup(u32 dur_s)
{
	struct scx_sched *sch;

	rcu_read_lock();

	sch = rcu_dereference(scx_root);
	if (unlikely(!sch))
		goto out_unlock;

	switch (scx_enable_state()) {
	case SCX_ENABLING:
	case SCX_ENABLED:
		break;
	default:
		goto out_unlock;
	}

	printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
			smp_processor_id(), dur_s, scx_root->ops.name);
	if (!handle_lockup("soft lockup - CPU %d stuck for %us", smp_processor_id(), dur_s))
		return;

	scx_error(sch, "soft lockup - CPU#%d stuck for %us", smp_processor_id(), dur_s);
out_unlock:
	rcu_read_unlock();
	printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU %d stuck for %us, disabling BPF scheduler\n",
			smp_processor_id(), dur_s);
}

/**