Commit 60f21a26 authored by Tejun Heo's avatar Tejun Heo
Browse files

cgroup, sched_ext: Include exiting tasks in cgroup iter



a72f73c4 ("cgroup: Don't expose dead tasks in cgroup") made
css_task_iter_advance() skip exiting tasks so cgroup.procs stays consistent
with waitpid() visibility. Unfortunately, this broke scx_task_iter.

scx_task_iter walks either scx_tasks (global) or a cgroup subtree via
css_task_iter() and the two modes are expected to cover the same set of
tasks. After the above change the cgroup-scoped mode silently skips tasks
past exit_signals() that are still on scx_tasks.

scx_sub_enable_workfn()'s abort path is one of the symptoms: an exiting
SCX_TASK_SUB_INIT task can race past the cgroup iter leaking
__scx_init_task() state. Other iterations share the same gap.

Add CSS_TASK_ITER_WITH_DEAD to opt out of the skip and use it from
scx_task_iter().

Fixes: b0e4c2f8 ("sched_ext: Implement cgroup subtree iteration for scx_task_iter")
Reported-by: default avatarCheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent d99f7a32
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ struct kernel_clone_args;
enum css_task_iter_flags {
	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
	CSS_TASK_ITER_WITH_DEAD = (1U << 2),  /* include exiting tasks */
	CSS_TASK_ITER_SKIPPED  = (1U << 16), /* internal flags */
};

+5 −3
Original line number Diff line number Diff line
@@ -5059,10 +5059,12 @@ static void css_task_iter_advance(struct css_task_iter *it)

	task = list_entry(it->task_pos, struct task_struct, cg_list);
	/*
	 * Hide tasks that are exiting but not yet removed. Keep zombie
	 * leaders with live threads visible.
	 * Hide tasks that are exiting but not yet removed by default. Keep
	 * zombie leaders with live threads visible. Usages that need to walk
	 * every existing task can opt out via CSS_TASK_ITER_WITH_DEAD.
	 */
	if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
	if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) &&
	    (task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
		goto repeat;

	if (it->flags & CSS_TASK_ITER_PROCS) {
+4 −2
Original line number Diff line number Diff line
@@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
		lockdep_assert_held(&cgroup_mutex);
		iter->cgrp = cgrp;
		iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self);
		css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
		css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
				    &iter->css_iter);
		return;
	}
#endif
@@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
			iter->css_pos = css_next_descendant_pre(iter->css_pos,
								&iter->cgrp->self);
			if (iter->css_pos)
				css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
				css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
						    &iter->css_iter);
		}
		return NULL;
	}