Commit 2e0199df authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/fair: Prepare exit/cleanup paths for delayed_dequeue



When dequeue_task() is delayed it becomes possible to exit a task (or
cgroup) that is still enqueued. Ensure things are dequeued before
freeing.

Thanks to Valentin for asking the obvious questions and making
switched_from_fair() less weird.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarValentin Schneider <vschneid@redhat.com>
Tested-by: default avatarValentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.631948434@infradead.org
parent e28b5f8b
Loading
Loading
Loading
Loading
+46 −13
Original line number Diff line number Diff line
@@ -8342,7 +8342,21 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)

static void task_dead_fair(struct task_struct *p)
{
	remove_entity_load_avg(&p->se);
	struct sched_entity *se = &p->se;

	if (se->sched_delayed) {
		struct rq_flags rf;
		struct rq *rq;

		rq = task_rq_lock(p, &rf);
		if (se->sched_delayed) {
			update_rq_clock(rq);
			dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
		}
		task_rq_unlock(rq, p, &rf);
	}

	remove_entity_load_avg(se);
}

/*
@@ -12854,10 +12868,22 @@ static void attach_task_cfs_rq(struct task_struct *p)
static void switched_from_fair(struct rq *rq, struct task_struct *p)
{
	detach_task_cfs_rq(p);
	/*
	 * Since this is called after changing class, this is a little weird
	 * and we cannot use DEQUEUE_DELAYED.
	 */
	if (p->se.sched_delayed) {
		dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
		p->se.sched_delayed = 0;
		if (sched_feat(DELAY_ZERO) && p->se.vlag > 0)
			p->se.vlag = 0;
	}
}

static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
	SCHED_WARN_ON(p->se.sched_delayed);

	attach_task_cfs_rq(p);

	set_task_max_allowed_capacity(p);
@@ -13008,28 +13034,35 @@ void online_fair_sched_group(struct task_group *tg)

void unregister_fair_sched_group(struct task_group *tg)
{
	unsigned long flags;
	struct rq *rq;
	int cpu;

	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));

	for_each_possible_cpu(cpu) {
		if (tg->se[cpu])
			remove_entity_load_avg(tg->se[cpu]);
		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
		struct sched_entity *se = tg->se[cpu];
		struct rq *rq = cpu_rq(cpu);

		if (se) {
			if (se->sched_delayed) {
				guard(rq_lock_irqsave)(rq);
				if (se->sched_delayed) {
					update_rq_clock(rq);
					dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
				}
				list_del_leaf_cfs_rq(cfs_rq);
			}
			remove_entity_load_avg(se);
		}

		/*
		 * Only empty task groups can be destroyed; so we can speculatively
		 * check on_list without danger of it being re-added.
		 */
		if (!tg->cfs_rq[cpu]->on_list)
			continue;

		rq = cpu_rq(cpu);

		raw_spin_rq_lock_irqsave(rq, flags);
		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
		raw_spin_rq_unlock_irqrestore(rq, flags);
		if (cfs_rq->on_list) {
			guard(rq_lock_irqsave)(rq);
			list_del_leaf_cfs_rq(cfs_rq);
		}
	}
}