Commit a5fa0708 authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Enforce scheduling authority in dispatch and select_cpu operations



Add checks to enforce scheduling authority boundaries when multiple
schedulers are present:

1. In scx_dsq_insert_preamble() and the dispatch retry path, ignore attempts
   to insert tasks that the scheduler doesn't own, counting them via
   SCX_EV_INSERT_NOT_OWNED. As BPF schedulers are allowed to ignore
   dequeues, such attempts can occur legitimately during sub-scheduler
   enabling when tasks move between schedulers. The counter helps distinguish
   normal cases from scheduler bugs.

2. For scx_bpf_dsq_insert_vtime() and scx_bpf_select_cpu_and(), error out
   when sub-schedulers are attached. These functions lack the aux__prog
   parameter needed to identify the calling scheduler, so they cannot be used
   safely with multiple schedulers. BPF programs should use the arg-wrapped
   versions (__scx_bpf_dsq_insert_vtime() and __scx_bpf_select_cpu_and())
   instead.

These checks ensure that with multiple concurrent schedulers, scheduler
identity can be properly determined and unauthorized task operations are
prevented or tracked.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarAndrea Righi <arighi@nvidia.com>
parent 105dcd00
Loading
Loading
Loading
Loading
+26 −0
Original line number Diff line number Diff line
@@ -2325,6 +2325,12 @@ static void finish_dispatch(struct scx_sched *sch, struct rq *rq,
		if ((opss & SCX_OPSS_QSEQ_MASK) != qseq_at_dispatch)
			return;

		/* see SCX_EV_INSERT_NOT_OWNED definition */
		if (unlikely(!scx_task_on_sched(sch, p))) {
			__scx_add_event(sch, SCX_EV_INSERT_NOT_OWNED, 1);
			return;
		}

		/*
		 * While we know @p is accessible, we don't yet have a claim on
		 * it - the BPF scheduler is allowed to dispatch tasks
@@ -4028,6 +4034,7 @@ static ssize_t scx_attr_events_show(struct kobject *kobj,
	at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DURATION);
	at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DISPATCH);
	at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_ACTIVATE);
	at += scx_attr_event_show(buf, at, &events, SCX_EV_INSERT_NOT_OWNED);
	return at;
}
SCX_ATTR(events);
@@ -5150,6 +5157,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
	scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
	scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
	scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
	scx_dump_event(s, &events, SCX_EV_INSERT_NOT_OWNED);

	if (seq_buf_has_overflowed(&s) && dump_len >= sizeof(trunc_marker))
		memcpy(ei->dump + dump_len - sizeof(trunc_marker),
@@ -6476,6 +6484,12 @@ static bool scx_dsq_insert_preamble(struct scx_sched *sch, struct task_struct *p
		return false;
	}

	/* see SCX_EV_INSERT_NOT_OWNED definition */
	if (unlikely(!scx_task_on_sched(sch, p))) {
		__scx_add_event(sch, SCX_EV_INSERT_NOT_OWNED, 1);
		return false;
	}

	return true;
}

@@ -6668,6 +6682,17 @@ __bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id,
	if (unlikely(!sch))
		return;

#ifdef CONFIG_EXT_SUB_SCHED
	/*
	 * Disallow if any sub-scheds are attached. There is no way to tell
	 * which scheduler called us, just error out @p's scheduler.
	 */
	if (unlikely(!list_empty(&sch->children))) {
		scx_error(scx_task_sched(p), "__scx_bpf_dsq_insert_vtime() must be used");
		return;
	}
#endif

	scx_dsq_insert_vtime(sch, p, dsq_id, slice, vtime, enq_flags);
}

@@ -8000,6 +8025,7 @@ static void scx_read_events(struct scx_sched *sch, struct scx_event_stats *event
		scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DURATION);
		scx_agg_event(events, e_cpu, SCX_EV_BYPASS_DISPATCH);
		scx_agg_event(events, e_cpu, SCX_EV_BYPASS_ACTIVATE);
		scx_agg_event(events, e_cpu, SCX_EV_INSERT_NOT_OWNED);
	}
}

+11 −0
Original line number Diff line number Diff line
@@ -1060,6 +1060,17 @@ __bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64
	if (unlikely(!sch))
		return -ENODEV;

#ifdef CONFIG_EXT_SUB_SCHED
	/*
	 * Disallow if any sub-scheds are attached. There is no way to tell
	 * which scheduler called us, just error out @p's scheduler.
	 */
	if (unlikely(!list_empty(&sch->children))) {
		scx_error(scx_task_sched(p), "__scx_bpf_select_cpu_and() must be used");
		return -EINVAL;
	}
#endif

	return select_cpu_from_kfunc(sch, p, prev_cpu, wake_flags,
				     cpus_allowed, flags);
}
+12 −0
Original line number Diff line number Diff line
@@ -911,6 +911,18 @@ struct scx_event_stats {
	 * The number of times the bypassing mode has been activated.
	 */
	s64		SCX_EV_BYPASS_ACTIVATE;

	/*
	 * The number of times the scheduler attempted to insert a task that it
	 * doesn't own into a DSQ. Such attempts are ignored.
	 *
	 * As BPF schedulers are allowed to ignore dequeues, it's difficult to
	 * tell whether such an attempt is from a scheduler malfunction or an
	 * ignored dequeue around sub-sched enabling. If this count keeps going
	 * up regardless of sub-sched enabling, it likely indicates a bug in the
	 * scheduler.
	 */
	s64		SCX_EV_INSERT_NOT_OWNED;
};

struct scx_sched_pcpu {