Commit 2e3f3090 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_ext-for-6.13-rc6-fixes' of...

Merge tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - Fix corner case bug where ops.dispatch() couldn't extend the
   execution of the current task if SCX_OPS_ENQ_LAST is set.

 - Fix ops.cpu_release() not being called when a SCX task is preempted
   by a higher priority sched class task.

 - Fix buitin idle mask being incorrectly left as busy after an idle CPU
   is picked and kicked.

 - scx_ops_bypass() was unnecessarily using rq_lock() which comes with
   rq pinning related sanity checks which could trigger spuriously.
   Switch to raw_spin_rq_lock().

* tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: idle: Refresh idle masks during idle-to-idle transitions
  sched_ext: switch class when preempted by higher priority scheduler
  sched_ext: Replace rq_lock() to raw_spin_rq_lock() in scx_ops_bypass()
  sched_ext: keep running prev when prev->scx.slice != 0
parents 58624e4b a2a3374c
Loading
Loading
Loading
Loading
+67 −20
Original line number Diff line number Diff line
@@ -2747,6 +2747,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
{
	struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
	bool prev_on_scx = prev->sched_class == &ext_sched_class;
	bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
	int nr_loops = SCX_DSP_MAX_LOOPS;

	lockdep_assert_rq_held(rq);
@@ -2779,8 +2780,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
		 * See scx_ops_disable_workfn() for the explanation on the
		 * bypassing test.
		 */
		if ((prev->scx.flags & SCX_TASK_QUEUED) &&
		    prev->scx.slice && !scx_rq_bypassing(rq)) {
		if (prev_on_rq && prev->scx.slice && !scx_rq_bypassing(rq)) {
			rq->scx.flags |= SCX_RQ_BAL_KEEP;
			goto has_tasks;
		}
@@ -2813,6 +2813,10 @@ static int balance_one(struct rq *rq, struct task_struct *prev)

		flush_dispatch_buf(rq);

		if (prev_on_rq && prev->scx.slice) {
			rq->scx.flags |= SCX_RQ_BAL_KEEP;
			goto has_tasks;
		}
		if (rq->scx.local_dsq.nr)
			goto has_tasks;
		if (consume_global_dsq(rq))
@@ -2838,8 +2842,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
	 * Didn't find another task to run. Keep running @prev unless
	 * %SCX_OPS_ENQ_LAST is in effect.
	 */
	if ((prev->scx.flags & SCX_TASK_QUEUED) &&
	    (!static_branch_unlikely(&scx_ops_enq_last) ||
	if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) ||
	     scx_rq_bypassing(rq))) {
		rq->scx.flags |= SCX_RQ_BAL_KEEP;
		goto has_tasks;
@@ -3034,7 +3037,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
		 */
		if (p->scx.slice && !scx_rq_bypassing(rq)) {
			dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
			return;
			goto switch_class;
		}

		/*
@@ -3051,6 +3054,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
		}
	}

switch_class:
	if (next && next->sched_class != &ext_sched_class)
		switch_class(rq, next);
}
@@ -3586,16 +3590,8 @@ static void reset_idle_masks(void)
	cpumask_copy(idle_masks.smt, cpu_online_mask);
}

void __scx_update_idle(struct rq *rq, bool idle)
static void update_builtin_idle(int cpu, bool idle)
{
	int cpu = cpu_of(rq);

	if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
		SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
		if (!static_branch_unlikely(&scx_builtin_idle_enabled))
			return;
	}

	if (idle)
		cpumask_set_cpu(cpu, idle_masks.cpu);
	else
@@ -3622,6 +3618,57 @@ void __scx_update_idle(struct rq *rq, bool idle)
#endif
}

/*
 * Update the idle state of a CPU to @idle.
 *
 * If @do_notify is true, ops.update_idle() is invoked to notify the scx
 * scheduler of an actual idle state transition (idle to busy or vice
 * versa). If @do_notify is false, only the idle state in the idle masks is
 * refreshed without invoking ops.update_idle().
 *
 * This distinction is necessary, because an idle CPU can be "reserved" and
 * awakened via scx_bpf_pick_idle_cpu() + scx_bpf_kick_cpu(), marking it as
 * busy even if no tasks are dispatched. In this case, the CPU may return
 * to idle without a true state transition. Refreshing the idle masks
 * without invoking ops.update_idle() ensures accurate idle state tracking
 * while avoiding unnecessary updates and maintaining balanced state
 * transitions.
 */
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
{
	int cpu = cpu_of(rq);

	lockdep_assert_rq_held(rq);

	/*
	 * Trigger ops.update_idle() only when transitioning from a task to
	 * the idle thread and vice versa.
	 *
	 * Idle transitions are indicated by do_notify being set to true,
	 * managed by put_prev_task_idle()/set_next_task_idle().
	 */
	if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
		SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);

	/*
	 * Update the idle masks:
	 * - for real idle transitions (do_notify == true)
	 * - for idle-to-idle transitions (indicated by the previous task
	 *   being the idle thread, managed by pick_task_idle())
	 *
	 * Skip updating idle masks if the previous task is not the idle
	 * thread, since set_next_task_idle() has already handled it when
	 * transitioning from a task to the idle thread (calling this
	 * function with do_notify == true).
	 *
	 * In this way we can avoid updating the idle masks twice,
	 * unnecessarily.
	 */
	if (static_branch_likely(&scx_builtin_idle_enabled))
		if (do_notify || is_idle_task(rq->curr))
			update_builtin_idle(cpu, idle);
}

static void handle_hotplug(struct rq *rq, bool online)
{
	int cpu = cpu_of(rq);
@@ -4744,10 +4791,9 @@ static void scx_ops_bypass(bool bypass)
	 */
	for_each_possible_cpu(cpu) {
		struct rq *rq = cpu_rq(cpu);
		struct rq_flags rf;
		struct task_struct *p, *n;

		rq_lock(rq, &rf);
		raw_spin_rq_lock(rq);

		if (bypass) {
			WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
@@ -4763,7 +4809,7 @@ static void scx_ops_bypass(bool bypass)
		 * sees scx_rq_bypassing() before moving tasks to SCX.
		 */
		if (!scx_enabled()) {
			rq_unlock(rq, &rf);
			raw_spin_rq_unlock(rq);
			continue;
		}

@@ -4783,10 +4829,11 @@ static void scx_ops_bypass(bool bypass)
			sched_enq_and_set_task(&ctx);
		}

		rq_unlock(rq, &rf);

		/* resched to restore ticks and idle state */
		resched_cpu(cpu);
		if (cpu_online(cpu) || cpu == smp_processor_id())
			resched_curr(rq);

		raw_spin_rq_unlock(rq);
	}

	atomic_dec(&scx_ops_breather_depth);
+4 −4
Original line number Diff line number Diff line
@@ -57,15 +57,15 @@ static inline void init_sched_ext_class(void) {}
#endif	/* CONFIG_SCHED_CLASS_EXT */

#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
void __scx_update_idle(struct rq *rq, bool idle);
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);

static inline void scx_update_idle(struct rq *rq, bool idle)
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
{
	if (scx_enabled())
		__scx_update_idle(rq, idle);
		__scx_update_idle(rq, idle, do_notify);
}
#else
static inline void scx_update_idle(struct rq *rq, bool idle) {}
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
#endif

#ifdef CONFIG_CGROUP_SCHED
+3 −2
Original line number Diff line number Diff line
@@ -452,19 +452,20 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
	dl_server_update_idle_time(rq, prev);
	scx_update_idle(rq, false);
	scx_update_idle(rq, false, true);
}

static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
	update_idle_core(rq);
	scx_update_idle(rq, true);
	scx_update_idle(rq, true, true);
	schedstat_inc(rq->sched_goidle);
	next->se.exec_start = rq_clock_task(rq);
}

struct task_struct *pick_task_idle(struct rq *rq)
{
	scx_update_idle(rq, true, false);
	return rq->idle;
}