Commit dbf89321 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_ext-for-6.19-rc1-fixes' of...

Merge tag 'sched_ext-for-6.19-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - Fix memory leak when destroying helper kthread workers during
   scheduler disable

 - Fix bypass depth accounting on scx_enable() failure which could leave
   the system permanently in bypass mode

 - Fix missing preemption handling when moving tasks to local DSQs via
   scx_bpf_dsq_move()

 - Misc fixes including NULL check for put_prev_task(), flushing stdout
   in selftests, and removing unused code

* tag 'sched_ext-for-6.19-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Remove unused code in the do_pick_task_scx()
  selftests/sched_ext: flush stdout before test to avoid log spam
  sched_ext: Fix missing post-enqueue handling in move_local_task_to_local_dsq()
  sched_ext: Factor out local_dsq_post_enq() from dispatch_enqueue()
  sched_ext: Fix bypass depth leak on scx_enable() failure
  sched/ext: Avoid null ptr traversal when ->put_prev_task() is called with NULL next
  sched_ext: Fix the memleak for sch->helper objects
parents 6b63f90f bb27226f
Loading
Loading
Loading
Loading
+48 −24
Original line number Diff line number Diff line
@@ -41,6 +41,13 @@ static bool scx_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);

/*
 * Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass
 * depth on enable failure. Will be removed when bypass depth is moved into the
 * sched instance.
 */
static bool scx_bypassed_for_enable;

static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);

@@ -975,6 +982,30 @@ static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
	__scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1);
}

static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p,
			       u64 enq_flags)
{
	struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
	bool preempt = false;

	/*
	 * If @rq is in balance, the CPU is already vacant and looking for the
	 * next task to run. No need to preempt or trigger resched after moving
	 * @p into its local DSQ.
	 */
	if (rq->scx.flags & SCX_RQ_IN_BALANCE)
		return;

	if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
	    rq->curr->sched_class == &ext_sched_class) {
		rq->curr->scx.slice = 0;
		preempt = true;
	}

	if (preempt || sched_class_above(&ext_sched_class, rq->curr->sched_class))
		resched_curr(rq);
}

static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
			     struct task_struct *p, u64 enq_flags)
{
@@ -1086,23 +1117,11 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
	if (enq_flags & SCX_ENQ_CLEAR_OPSS)
		atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);

	if (is_local) {
		struct rq *rq = container_of(dsq, struct rq, scx.local_dsq);
		bool preempt = false;

		if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr &&
		    rq->curr->sched_class == &ext_sched_class) {
			rq->curr->scx.slice = 0;
			preempt = true;
		}

		if (preempt || sched_class_above(&ext_sched_class,
						 rq->curr->sched_class))
			resched_curr(rq);
	} else {
	if (is_local)
		local_dsq_post_enq(dsq, p, enq_flags);
	else
		raw_spin_unlock(&dsq->lock);
}
}

static void task_unlink_from_dsq(struct task_struct *p,
				 struct scx_dispatch_q *dsq)
@@ -1625,6 +1644,8 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,

	dsq_mod_nr(dst_dsq, 1);
	p->scx.dsq = dst_dsq;

	local_dsq_post_enq(dst_dsq, p, enq_flags);
}

/**
@@ -2402,7 +2423,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
		 * ops.enqueue() that @p is the only one available for this cpu,
		 * which should trigger an explicit follow-up scheduling event.
		 */
		if (sched_class_above(&ext_sched_class, next->sched_class)) {
		if (next && sched_class_above(&ext_sched_class, next->sched_class)) {
			WARN_ON_ONCE(!(sch->ops.flags & SCX_OPS_ENQ_LAST));
			do_enqueue_task(rq, p, SCX_ENQ_LAST, -1);
		} else {
@@ -2425,7 +2446,7 @@ static struct task_struct *
do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
{
	struct task_struct *prev = rq->curr;
	bool keep_prev, kick_idle = false;
	bool keep_prev;
	struct task_struct *p;

	/* see kick_cpus_irq_workfn() */
@@ -2467,12 +2488,8 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
			refill_task_slice_dfl(rcu_dereference_sched(scx_root), p);
	} else {
		p = first_local_task(rq);
		if (!p) {
			if (kick_idle)
				scx_kick_cpu(rcu_dereference_sched(scx_root),
					     cpu_of(rq), SCX_KICK_IDLE);
		if (!p)
			return NULL;
		}

		if (unlikely(!p->scx.slice)) {
			struct scx_sched *sch = rcu_dereference_sched(scx_root);
@@ -3575,7 +3592,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
	int node;

	irq_work_sync(&sch->error_irq_work);
	kthread_stop(sch->helper->task);
	kthread_destroy_worker(sch->helper);

	free_percpu(sch->pcpu);

@@ -4318,6 +4335,11 @@ static void scx_disable_workfn(struct kthread_work *work)
	scx_dsp_max_batch = 0;
	free_kick_syncs();

	if (scx_bypassed_for_enable) {
		scx_bypassed_for_enable = false;
		scx_bypass(false);
	}

	mutex_unlock(&scx_enable_mutex);

	WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
@@ -4786,7 +4808,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
	return sch;

err_stop_helper:
	kthread_stop(sch->helper->task);
	kthread_destroy_worker(sch->helper);
err_free_pcpu:
	free_percpu(sch->pcpu);
err_free_gdsqs:
@@ -4970,6 +4992,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
	 * Init in bypass mode to guarantee forward progress.
	 */
	scx_bypass(true);
	scx_bypassed_for_enable = true;

	for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
		if (((void (**)(void))ops)[i])
@@ -5067,6 +5090,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
	scx_task_iter_stop(&sti);
	percpu_up_write(&scx_fork_rwsem);

	scx_bypassed_for_enable = false;
	scx_bypass(false);

	if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) {
+8 −0
Original line number Diff line number Diff line
@@ -46,6 +46,14 @@ static void print_test_preamble(const struct scx_test *test, bool quiet)
	if (!quiet)
		printf("DESCRIPTION: %s\n", test->description);
	printf("OUTPUT:\n");

	/*
	 * The tests may fork with the preamble buffered
	 * in the children's stdout. Flush before the test
	 * to avoid printing the message multiple times.
	 */
	fflush(stdout);
	fflush(stderr);
}

static const char *status_to_result(enum scx_test_status status)