sched_ext: Decouple kfunc unlocked-context check from kf_mask (0022b328) · Commits · git / linux-net

kernel/sched/ext.c

+3 −1

Original line number	Diff line number	Diff line
		@@ -3308,10 +3308,12 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
		WARN_ON_ONCE(*ddsp_taskp);
		*ddsp_taskp = p;

		this_rq()->scx.in_select_cpu = true;
		cpu = SCX_CALL_OP_TASK_RET(sch,
		SCX_KF_ENQUEUE \| SCX_KF_SELECT_CPU,
		select_cpu, NULL, p, prev_cpu,
		wake_flags);
		this_rq()->scx.in_select_cpu = false;
		p->scx.selected_cpu = cpu;
		*ddsp_taskp = NULL;
		if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
		@@ -8144,7 +8146,7 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
		bool in_balance;
		unsigned long flags;

		if (!scx_kf_allowed_if_unlocked() &&
		if ((scx_locked_rq() \|\| this_rq()->scx.in_select_cpu) &&
		!scx_kf_allowed(sch, SCX_KF_DISPATCH))
		return false;

kernel/sched/ext_idle.c

+17 −22

Original line number	Diff line number	Diff line
		@@ -913,8 +913,8 @@ static s32 select_cpu_from_kfunc(struct scx_sched sch, struct task_struct p,
		s32 prev_cpu, u64 wake_flags,
		const struct cpumask *allowed, u64 flags)
		{
		struct rq *rq;
		struct rq_flags rf;
		unsigned long irq_flags;
		bool we_locked = false;
		s32 cpu;

		if (!ops_cpu_valid(sch, prev_cpu, NULL))
		@@ -924,28 +924,23 @@ static s32 select_cpu_from_kfunc(struct scx_sched sch, struct task_struct p,
		return -EBUSY;

		/*
		* If called from an unlocked context, acquire the task's rq lock,
		* so that we can safely access p->cpus_ptr and p->nr_cpus_allowed.
		* Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq
		* lock or @p's pi_lock. Three cases:
		*
		* Otherwise, allow to use this kfunc only from ops.select_cpu()
		* and ops.select_enqueue().
		* - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock.
		* - other rq-locked SCX op: scx_locked_rq() points at the held rq.
		* - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops):
		* nothing held, take pi_lock ourselves.
		*/
		if (scx_kf_allowed_if_unlocked()) {
		rq = task_rq_lock(p, &rf);
		} else {
		if (!scx_kf_allowed(sch, SCX_KF_SELECT_CPU \| SCX_KF_ENQUEUE))
		if (this_rq()->scx.in_select_cpu) {
		lockdep_assert_held(&p->pi_lock);
		} else if (!scx_locked_rq()) {
		raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
		we_locked = true;
		} else if (!scx_kf_allowed(sch, SCX_KF_ENQUEUE)) {
		return -EPERM;
		rq = scx_locked_rq();
		}

		/*
		* Validate locking correctness to access p->cpus_ptr and
		* p->nr_cpus_allowed: if we're holding an rq lock, we're safe;
		* otherwise, assert that p->pi_lock is held.
		*/
		if (!rq)
		lockdep_assert_held(&p->pi_lock);

		/*
		* This may also be called from ops.enqueue(), so we need to handle
		* per-CPU tasks as well. For these tasks, we can skip all idle CPU
		@@ -963,8 +958,8 @@ static s32 select_cpu_from_kfunc(struct scx_sched sch, struct task_struct p,
		allowed ?: p->cpus_ptr, flags);
		}

		if (scx_kf_allowed_if_unlocked())
		task_rq_unlock(rq, p, &rf);
		if (we_locked)
		raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);

		return cpu;
		}

kernel/sched/ext_internal.h

+0 −5

Original line number	Diff line number	Diff line
		@@ -1372,11 +1372,6 @@ static inline struct rq *scx_locked_rq(void)
		return __this_cpu_read(scx_locked_rq_state);
		}

		static inline bool scx_kf_allowed_if_unlocked(void)
		{
		return !current->scx.kf_mask;
		}

		static inline bool scx_bypassing(struct scx_sched *sch, s32 cpu)
		{
		return unlikely(per_cpu_ptr(sch->pcpu, cpu)->flags &

kernel/sched/sched.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -798,6 +798,7 @@ struct scx_rq {
		u64 extra_enq_flags; /* see move_task_to_local_dsq() */
		u32 nr_running;
		u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
		bool in_select_cpu;
		bool cpu_released;
		u32 flags;
		u32 nr_immed; /* ENQ_IMMED tasks on local_dsq */