Commit 9276b7cc authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Support dumping multiple schedulers and add scheduler identification



Extend scx_dump_state() to support multiple schedulers and improve task
identification in dumps. The function now takes a specific scheduler to
dump and can optionally filter tasks by scheduler.

scx_dump_task() now displays which scheduler each task belongs to, using
"*" to mark tasks owned by the scheduler being dumped. Sub-schedulers
are identified with their level and cgroup ID.

The SysRq-D handler now iterates through all active schedulers under
scx_sched_lock and dumps each one separately. For SysRq-D dumps, only
tasks owned by each scheduler are dumped to avoid redundancy since all
schedulers are being dumped. Error-triggered dumps continue to dump all
tasks since only that specific scheduler is being dumped.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarAndrea Righi <arighi@nvidia.com>
parent eff782fd
Loading
Loading
Loading
Loading
+43 −11
Original line number Diff line number Diff line
@@ -5175,22 +5175,34 @@ static void ops_dump_exit(void)
	scx_dump_data.cpu = -1;
}

static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
static void scx_dump_task(struct scx_sched *sch,
			  struct seq_buf *s, struct scx_dump_ctx *dctx,
			  struct task_struct *p, char marker)
{
	static unsigned long bt[SCX_EXIT_BT_LEN];
	struct scx_sched *sch = scx_root;
	struct scx_sched *task_sch = scx_task_sched(p);
	const char *own_marker;
	char sch_id_buf[32];
	char dsq_id_buf[19] = "(n/a)";
	unsigned long ops_state = atomic_long_read(&p->scx.ops_state);
	unsigned int bt_len = 0;

	own_marker = task_sch == sch ? "*" : "";

	if (task_sch->level == 0)
		scnprintf(sch_id_buf, sizeof(sch_id_buf), "root");
	else
		scnprintf(sch_id_buf, sizeof(sch_id_buf), "sub%d-%llu",
			  task_sch->level, task_sch->ops.sub_cgroup_id);

	if (p->scx.dsq)
		scnprintf(dsq_id_buf, sizeof(dsq_id_buf), "0x%llx",
			  (unsigned long long)p->scx.dsq->id);

	dump_newline(s);
	dump_line(s, " %c%c %s[%d] %+ldms",
	dump_line(s, " %c%c %s[%d] %s%s %+ldms",
		  marker, task_state_to_char(p), p->comm, p->pid,
		  own_marker, sch_id_buf,
		  jiffies_delta_msecs(p->scx.runnable_at, dctx->at_jiffies));
	dump_line(s, "      scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu",
		  scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK,
@@ -5218,11 +5230,18 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
	}
}

static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
/*
 * Dump scheduler state. If @dump_all_tasks is true, dump all tasks regardless
 * of which scheduler they belong to. If false, only dump tasks owned by @sch.
 * For SysRq-D dumps, @dump_all_tasks=false since all schedulers are dumped
 * separately. For error dumps, @dump_all_tasks=true since only the failing
 * scheduler is dumped.
 */
static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
			   size_t dump_len, bool dump_all_tasks)
{
	static DEFINE_RAW_SPINLOCK(dump_lock);
	static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
	struct scx_sched *sch = scx_root;
	struct scx_dump_ctx dctx = {
		.kind = ei->kind,
		.exit_code = ei->exit_code,
@@ -5239,6 +5258,14 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)

	seq_buf_init(&s, ei->dump, dump_len);

#ifdef CONFIG_EXT_SUB_SCHED
	if (sch->level == 0)
		dump_line(&s, "%s: root", sch->ops.name);
	else
		dump_line(&s, "%s: sub%d-%llu %s",
			  sch->ops.name, sch->level, sch->ops.sub_cgroup_id,
			  sch->cgrp_path);
#endif
	if (ei->kind == SCX_EXIT_NONE) {
		dump_line(&s, "Debug dump triggered by %s", ei->reason);
	} else {
@@ -5331,11 +5358,13 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
				seq_buf_set_overflow(&s);
		}

		if (rq->curr->sched_class == &ext_sched_class)
			scx_dump_task(&s, &dctx, rq->curr, '*');
		if (rq->curr->sched_class == &ext_sched_class &&
		    (dump_all_tasks || scx_task_on_sched(sch, rq->curr)))
			scx_dump_task(sch, &s, &dctx, rq->curr, '*');

		list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node)
			scx_dump_task(&s, &dctx, p, ' ');
			if (dump_all_tasks || scx_task_on_sched(sch, p))
				scx_dump_task(sch, &s, &dctx, p, ' ');
	next:
		rq_unlock_irqrestore(rq, &rf);
	}
@@ -5368,7 +5397,7 @@ static void scx_error_irq_workfn(struct irq_work *irq_work)
	struct scx_exit_info *ei = sch->exit_info;

	if (ei->kind >= SCX_EXIT_ERROR)
		scx_dump_state(ei, sch->ops.exit_dump_len);
		scx_dump_state(sch, ei, sch->ops.exit_dump_len, true);

	kthread_queue_work(sch->helper, &sch->disable_work);
}
@@ -6400,9 +6429,12 @@ static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
static void sysrq_handle_sched_ext_dump(u8 key)
{
	struct scx_exit_info ei = { .kind = SCX_EXIT_NONE, .reason = "SysRq-D" };
	struct scx_sched *sch;

	if (scx_enabled())
		scx_dump_state(&ei, 0);
	guard(raw_spinlock_irqsave)(&scx_sched_lock);

	list_for_each_entry_rcu(sch, &scx_sched_all, all)
		scx_dump_state(sch, &ei, 0, false);
}

static const struct sysrq_key_op sysrq_sched_ext_dump_op = {