Commit 75b607fa authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_ext-for-6.12-rc2-fixes' of...

Merge tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - ops.enqueue() didn't have a way to tell whether select_task_rq_scx()
   and thus ops.select() were skipped. Some schedulers were incorrectly
   using SCX_ENQ_WAKEUP. Add SCX_ENQ_CPU_SELECTED and fix scx_qmap using
   it.

 - Remove a spurious WARN_ON_ONCE() in scx_cgroup_exit()

 - Fix error information clobbering during load

 - Add missing __weak markers to BPF helper declarations

 - Doc update

* tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Documentation: Update instructions for running example schedulers
  sched_ext, scx_qmap: Add and use SCX_ENQ_CPU_SELECTED
  sched/core: Add ENQUEUE_RQ_SELECTED to indicate whether ->select_task_rq() was called
  sched/core: Make select_task_rq() take the pointer to wake_flags instead of value
  sched_ext: scx_cgroup_exit() may be called without successful scx_cgroup_init()
  sched_ext: Improve error reporting during loading
  sched_ext: Add __weak markers to BPF helper function decalarations
parents 5b7c893e e0ed5215
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS.
.. code-block:: none

    # make -j16 -C tools/sched_ext
    # tools/sched_ext/scx_simple
    # tools/sched_ext/build/bin/scx_simple
    local=0 global=3
    local=5 global=24
    local=9 global=44
+14 −7
Original line number Diff line number Diff line
@@ -3518,14 +3518,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
 */
static inline
int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
{
	lockdep_assert_held(&p->pi_lock);

	if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
		cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
	else
	if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
		cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
		*wake_flags |= WF_RQ_SELECTED;
	} else {
		cpu = cpumask_any(p->cpus_ptr);
	}

	/*
	 * In order not to call set_task_cpu() on a blocking task we need
@@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
		rq->nr_uninterruptible--;

#ifdef CONFIG_SMP
	if (wake_flags & WF_RQ_SELECTED)
		en_flags |= ENQUEUE_RQ_SELECTED;
	if (wake_flags & WF_MIGRATED)
		en_flags |= ENQUEUE_MIGRATED;
	else
@@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
	guard(preempt)();
	int cpu, success = 0;

	wake_flags |= WF_TTWU;

	if (p == current) {
		/*
		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		 */
		smp_cond_load_acquire(&p->on_cpu, !VAL);

		cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
		cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
		if (task_cpu(p) != cpu) {
			if (p->in_iowait) {
				delayacct_blkio_end(p);
@@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p)
{
	struct rq_flags rf;
	struct rq *rq;
	int wake_flags = WF_FORK;

	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	WRITE_ONCE(p->__state, TASK_RUNNING);
@@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p)
	 */
	p->recent_used_cpu = task_cpu(p);
	rseq_migrate(p);
	__set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK));
	__set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
#endif
	rq = __task_rq_lock(p, &rf);
	update_rq_clock(rq);
@@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p)

	activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
	trace_sched_wakeup_new(p);
	wakeup_preempt(rq, p, WF_FORK);
	wakeup_preempt(rq, p, wake_flags);
#ifdef CONFIG_SMP
	if (p->sched_class->task_woken) {
		/*
+20 −12
Original line number Diff line number Diff line
@@ -625,6 +625,10 @@ struct sched_ext_ops {
	/**
	 * exit - Clean up after the BPF scheduler
	 * @info: Exit info
	 *
	 * ops.exit() is also called on ops.init() failure, which is a bit
	 * unusual. This is to allow rich reporting through @info on how
	 * ops.init() failed.
	 */
	void (*exit)(struct scx_exit_info *info);

@@ -692,6 +696,7 @@ enum scx_enq_flags {
	/* expose select ENQUEUE_* flags as enums */
	SCX_ENQ_WAKEUP		= ENQUEUE_WAKEUP,
	SCX_ENQ_HEAD		= ENQUEUE_HEAD,
	SCX_ENQ_CPU_SELECTED	= ENQUEUE_RQ_SELECTED,

	/* high 32bits are SCX specific */

@@ -4048,7 +4053,6 @@ static void scx_cgroup_exit(void)

	percpu_rwsem_assert_held(&scx_cgroup_rwsem);

	WARN_ON_ONCE(!scx_cgroup_enabled);
	scx_cgroup_enabled = false;

	/*
@@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void)
				      css->cgroup, &args);
		if (ret) {
			css_put(css);
			scx_ops_error("ops.cgroup_init() failed (%d)", ret);
			return ret;
		}
		tg->scx_flags |= SCX_TG_INITED;
@@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
		if (ret) {
			ret = ops_sanitize_err("init", ret);
			cpus_read_unlock();
			scx_ops_error("ops.init() failed (%d)", ret);
			goto err_disable;
		}
	}
@@ -5150,7 +5156,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
			spin_lock_irq(&scx_tasks_lock);
			scx_task_iter_exit(&sti);
			spin_unlock_irq(&scx_tasks_lock);
			pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
			scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
				      ret, p->comm, p->pid);
			goto err_disable_unlock_all;
		}
@@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)

	scx_ops_bypass(false);

	/*
	 * Returning an error code here would lose the recorded error
	 * information. Exit indicating success so that the error is notified
	 * through ops.exit() with all the details.
	 */
	if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
		WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
		ret = 0;
		goto err_disable;
	}

@@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
	scx_ops_bypass(false);
err_disable:
	mutex_unlock(&scx_ops_enable_mutex);
	/* must be fully disabled before returning */
	scx_ops_disable(SCX_EXIT_ERROR);
	/*
	 * Returning an error code here would not pass all the error information
	 * to userspace. Record errno using scx_ops_error() for cases
	 * scx_ops_error() wasn't already invoked and exit indicating success so
	 * that the error is notified through ops.exit() with all the details.
	 *
	 * Flush scx_ops_disable_work to ensure that error is reported before
	 * init completion.
	 */
	scx_ops_error("scx_ops_enable() failed (%d)", ret);
	kthread_flush_work(&scx_ops_disable_work);
	return ret;
	return 0;
}


+3 −0
Original line number Diff line number Diff line
@@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC			0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED		0x20 /* Internal use, task got migrated */
#define WF_CURRENT_CPU		0x40 /* Prefer to move the wakee to the current CPU. */
#define WF_RQ_SELECTED		0x80 /* ->select_task_rq() was called */

#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
 * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
 * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
 * ENQUEUE_MIGRATED  - the task was migrated during wakeup
 * ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
 *
 */

@@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_INITIAL		0x80
#define ENQUEUE_MIGRATING	0x100
#define ENQUEUE_DELAYED		0x200
#define ENQUEUE_RQ_SELECTED	0x400

#define RETRY_TASK		((void *)-1UL)

+3 −3
Original line number Diff line number Diff line
@@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym;
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
u32 scx_bpf_reenqueue_local(void) __ksym;
@@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;

/*
 * Use the following as @it__iter when calling
Loading