Commit 7a0aabd9 authored by Waiman Long's avatar Waiman Long Committed by Tejun Heo
Browse files

cgroup/cpuset: Always use cpu_active_mask



The current cpuset code uses both cpu_active_mask and cpu_online_mask
and it can be confusing which one should be used if we need to update
the code.

The top_cpuset is always synchronized to cpu_active_mask and we should
avoid using cpu_online_mask as much as possible. An active CPU is always
an online CPU, but not vice versa. cpu_active_mask and cpu_online_mask
can differ during hotplug operations.

A CPU is marked active at the last stage of CPU bringup (CPUHP_AP_ACTIVE).
It is also the stage where cpuset hotplug code will be called to update
the sched domains so that the scheduler can move a normal task to a
newly active CPU or remove tasks away from a newly inactivated CPU. The
online bit is set much earlier in the CPU bringup process and cleared
much later in CPU teardown.

If cpu_online_mask is used while a hotunplug operation is happening in
parallel, we may leave an offline CPU in cpu_allowed or have a higher
chance of leaving an offline CPU in some other masks.  Avoid this
problem by always using cpu_active_mask in the cpuset code and leave
a comment as to why the use of cpu_online_mask is discouraged.

Signed-off-by: default avatarWaiman Long <longman@redhat.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent a9791555
Loading
Loading
Loading
Loading
+23 −9
Original line number Diff line number Diff line
@@ -192,6 +192,20 @@ static inline void notify_partition_change(struct cpuset *cs, int old_prs)
		WRITE_ONCE(cs->prs_err, PERR_NONE);
}

/*
 * The top_cpuset is always synchronized to cpu_active_mask and we should avoid
 * using cpu_online_mask as much as possible. An active CPU is always an online
 * CPU, but not vice versa. cpu_active_mask and cpu_online_mask can differ
 * during hotplug operations. A CPU is marked active at the last stage of CPU
 * bringup (CPUHP_AP_ACTIVE). It is also the stage where cpuset hotplug code
 * will be called to update the sched domains so that the scheduler can move
 * a normal task to a newly active CPU or remove tasks away from a newly
 * inactivated CPU. The online bit is set much earlier in the CPU bringup
 * process and cleared much later in CPU teardown.
 *
 * If cpu_online_mask is used while a hotunplug operation is happening in
 * parallel, we may leave an offline CPU in cpu_allowed or some other masks.
 */
static struct cpuset top_cpuset = {
	.flags = BIT(CS_ONLINE) | BIT(CS_CPU_EXCLUSIVE) |
		 BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
@@ -355,18 +369,18 @@ static inline bool partition_is_populated(struct cpuset *cs,
 * appropriate cpus.
 *
 * One way or another, we guarantee to return some non-empty subset
 * of cpu_online_mask.
 * of cpu_active_mask.
 *
 * Call with callback_lock or cpuset_mutex held.
 */
static void guarantee_online_cpus(struct task_struct *tsk,
static void guarantee_active_cpus(struct task_struct *tsk,
				  struct cpumask *pmask)
{
	const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
	struct cpuset *cs;

	if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
		cpumask_copy(pmask, cpu_online_mask);
	if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_active_mask)))
		cpumask_copy(pmask, cpu_active_mask);

	rcu_read_lock();
	cs = task_cs(tsk);
@@ -2263,7 +2277,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
	bool force = false;
	int old_prs = cs->partition_root_state;

	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
	/* top_cpuset.cpus_allowed tracks cpu_active_mask; it's read-only */
	if (cs == &top_cpuset)
		return -EACCES;

@@ -3082,7 +3096,7 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
	lockdep_assert_held(&cpuset_mutex);

	if (cs != &top_cpuset)
		guarantee_online_cpus(task, cpus_attach);
		guarantee_active_cpus(task, cpus_attach);
	else
		cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
			       subpartitions_cpus);
@@ -4026,7 +4040,7 @@ void __init cpuset_init_smp(void)
 *
 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
 * attached to the specified @tsk.  Guaranteed to return some non-empty
 * subset of cpu_online_mask, even if this means going outside the
 * subset of cpu_active_mask, even if this means going outside the
 * tasks cpuset, except when the task is in the top cpuset.
 **/

@@ -4040,7 +4054,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)

	cs = task_cs(tsk);
	if (cs != &top_cpuset)
		guarantee_online_cpus(tsk, pmask);
		guarantee_active_cpus(tsk, pmask);
	/*
	 * Tasks in the top cpuset won't get update to their cpumasks
	 * when a hotplug online/offline event happens. So we include all
@@ -4054,7 +4068,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
		 * allowable online cpu left, we fall back to all possible cpus.
		 */
		cpumask_andnot(pmask, possible_mask, subpartitions_cpus);
		if (!cpumask_intersects(pmask, cpu_online_mask))
		if (!cpumask_intersects(pmask, cpu_active_mask))
			cpumask_copy(pmask, possible_mask);
	}