Commit 11e5f407 authored by Waiman Long's avatar Waiman Long Committed by Tejun Heo
Browse files

cgroup/cpuset: Keep track of CPUs in isolated partitions



Add a new internal isolated_cpus mask to keep track of the CPUs that are in
isolated partitions. Expose that new cpumask as a new root-only control file
".cpuset.cpus.isolated".

tj: Updated patch description to reflect dropping __DEBUG__ prefix.

Signed-off-by: default avatarWaiman Long <longman@redhat.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 14060dfc
Loading
Loading
Loading
Loading
+127 −63
Original line number Diff line number Diff line
@@ -204,6 +204,11 @@ struct cpuset {
 */
static cpumask_var_t	subpartitions_cpus;

/*
 * Exclusive CPUs in isolated partitions
 */
static cpumask_var_t	isolated_cpus;

/* List of remote partition root children */
static struct list_head remote_children;

@@ -1317,6 +1322,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
 */
enum partition_cmd {
	partcmd_enable,		/* Enable partition root	  */
	partcmd_enablei,	/* Enable isolated partition root */
	partcmd_disable,	/* Disable partition root	  */
	partcmd_update,		/* Update parent's effective_cpus */
	partcmd_invalidate,	/* Make partition invalid	  */
@@ -1418,6 +1424,74 @@ static void reset_partition_data(struct cpuset *cs)
	}
}

/*
 * partition_xcpus_newstate - Exclusive CPUs state change
 * @old_prs: old partition_root_state
 * @new_prs: new partition_root_state
 * @xcpus: exclusive CPUs with state change
 */
static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
{
	WARN_ON_ONCE(old_prs == new_prs);
	if (new_prs == PRS_ISOLATED)
		cpumask_or(isolated_cpus, isolated_cpus, xcpus);
	else
		cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
}

/*
 * partition_xcpus_add - Add new exclusive CPUs to partition
 * @new_prs: new partition_root_state
 * @parent: parent cpuset
 * @xcpus: exclusive CPUs to be added
 *
 * Remote partition if parent == NULL
 */
static void partition_xcpus_add(int new_prs, struct cpuset *parent,
				struct cpumask *xcpus)
{
	WARN_ON_ONCE(new_prs < 0);
	lockdep_assert_held(&callback_lock);
	if (!parent)
		parent = &top_cpuset;

	if (parent == &top_cpuset)
		cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);

	if (new_prs != parent->partition_root_state)
		partition_xcpus_newstate(parent->partition_root_state, new_prs,
					 xcpus);

	cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
}

/*
 * partition_xcpus_del - Remove exclusive CPUs from partition
 * @old_prs: old partition_root_state
 * @parent: parent cpuset
 * @xcpus: exclusive CPUs to be removed
 *
 * Remote partition if parent == NULL
 */
static void partition_xcpus_del(int old_prs, struct cpuset *parent,
				struct cpumask *xcpus)
{
	WARN_ON_ONCE(old_prs < 0);
	lockdep_assert_held(&callback_lock);
	if (!parent)
		parent = &top_cpuset;

	if (parent == &top_cpuset)
		cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);

	if (old_prs != parent->partition_root_state)
		partition_xcpus_newstate(old_prs, parent->partition_root_state,
					 xcpus);

	cpumask_and(xcpus, xcpus, cpu_active_mask);
	cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
}

/*
 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
 * @cs: cpuset
@@ -1456,13 +1530,15 @@ static inline bool is_local_partition(struct cpuset *cs)
/*
 * remote_partition_enable - Enable current cpuset as a remote partition root
 * @cs: the cpuset to update
 * @new_prs: new partition_root_state
 * @tmp: temparary masks
 * Return: 1 if successful, 0 if error
 *
 * Enable the current cpuset to become a remote partition root taking CPUs
 * directly from the top cpuset. cpuset_mutex must be held by the caller.
 */
static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
static int remote_partition_enable(struct cpuset *cs, int new_prs,
				   struct tmpmasks *tmp)
{
	/*
	 * The user must have sysadmin privilege.
@@ -1485,18 +1561,14 @@ static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
		return 0;

	spin_lock_irq(&callback_lock);
	cpumask_andnot(top_cpuset.effective_cpus,
		       top_cpuset.effective_cpus, tmp->new_cpus);
	cpumask_or(subpartitions_cpus,
		   subpartitions_cpus, tmp->new_cpus);

	partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
	list_add(&cs->remote_sibling, &remote_children);
	if (cs->use_parent_ecpus) {
		struct cpuset *parent = parent_cs(cs);

		cs->use_parent_ecpus = false;
		parent->child_ecpus_count--;
	}
	list_add(&cs->remote_sibling, &remote_children);
	spin_unlock_irq(&callback_lock);

	/*
@@ -1524,13 +1596,8 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
	WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus));

	spin_lock_irq(&callback_lock);
	cpumask_andnot(subpartitions_cpus,
		       subpartitions_cpus, tmp->new_cpus);
	cpumask_and(tmp->new_cpus,
		    tmp->new_cpus, cpu_active_mask);
	cpumask_or(top_cpuset.effective_cpus,
		   top_cpuset.effective_cpus, tmp->new_cpus);
	list_del_init(&cs->remote_sibling);
	partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus);
	cs->partition_root_state = -cs->partition_root_state;
	if (!cs->prs_err)
		cs->prs_err = PERR_INVCPUS;
@@ -1557,6 +1624,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
			       struct tmpmasks *tmp)
{
	bool adding, deleting;
	int prs = cs->partition_root_state;

	if (WARN_ON_ONCE(!is_remote_partition(cs)))
		return;
@@ -1580,20 +1648,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
		goto invalidate;

	spin_lock_irq(&callback_lock);
	if (adding) {
		cpumask_or(subpartitions_cpus,
			   subpartitions_cpus, tmp->addmask);
		cpumask_andnot(top_cpuset.effective_cpus,
			       top_cpuset.effective_cpus, tmp->addmask);
	}
	if (deleting) {
		cpumask_andnot(subpartitions_cpus,
			       subpartitions_cpus, tmp->delmask);
		cpumask_and(tmp->delmask,
			    tmp->delmask, cpu_active_mask);
		cpumask_or(top_cpuset.effective_cpus,
			   top_cpuset.effective_cpus, tmp->delmask);
	}
	if (adding)
		partition_xcpus_add(prs, NULL, tmp->addmask);
	if (deleting)
		partition_xcpus_del(prs, NULL, tmp->delmask);
	spin_unlock_irq(&callback_lock);

	/*
@@ -1676,11 +1734,11 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
 * @tmp:     Temporary addmask and delmask
 * Return:   0 or a partition root state error code
 *
 * For partcmd_enable, the cpuset is being transformed from a non-partition
 * root to a partition root. The effective_xcpus (cpus_allowed if effective_xcpus
 * not set) mask of the given cpuset will be taken away from parent's
 * effective_cpus. The function will return 0 if all the CPUs listed in
 * effective_xcpus can be granted or an error code will be returned.
 * For partcmd_enable*, the cpuset is being transformed from a non-partition
 * root to a partition root. The effective_xcpus (cpus_allowed if
 * effective_xcpus not set) mask of the given cpuset will be taken away from
 * parent's effective_cpus. The function will return 0 if all the CPUs listed
 * in effective_xcpus can be granted or an error code will be returned.
 *
 * For partcmd_disable, the cpuset is being transformed from a partition
 * root back to a non-partition root. Any CPUs in effective_xcpus will be
@@ -1695,7 +1753,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
 *
 * For partcmd_invalidate, the current partition will be made invalid.
 *
 * The partcmd_enable and partcmd_disable commands are used by
 * The partcmd_enable* and partcmd_disable commands are used by
 * update_prstate(). An error code may be returned and the caller will check
 * for error.
 *
@@ -1760,7 +1818,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,

	nocpu = tasks_nocpu_error(parent, cs, xcpus);

	if (cmd == partcmd_enable) {
	if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) {
		/*
		 * Enabling partition root is not allowed if its
		 * effective_xcpus is empty or doesn't overlap with
@@ -1783,6 +1841,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
		cpumask_copy(tmp->delmask, xcpus);
		deleting = true;
		subparts_delta++;
		new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
	} else if (cmd == partcmd_disable) {
		/*
		 * May need to add cpus to parent's effective_cpus for
@@ -1792,6 +1851,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
			  cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
		if (adding)
			subparts_delta--;
		new_prs = PRS_MEMBER;
	} else if (newmask) {
		/*
		 * Empty cpumask is not allowed
@@ -1940,37 +2000,24 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
	 * newly deleted ones will be added back to effective_cpus.
	 */
	spin_lock_irq(&callback_lock);
	if (adding) {
		if (parent == &top_cpuset)
			cpumask_andnot(subpartitions_cpus,
				       subpartitions_cpus, tmp->addmask);
	if (old_prs != new_prs) {
		cs->partition_root_state = new_prs;
		if (new_prs <= 0)
			cs->nr_subparts = 0;
	}
	/*
		 * Some of the CPUs in effective_xcpus might have been offlined.
	 * Adding to parent's effective_cpus means deletion CPUs from cs
	 * and vice versa.
	 */
		cpumask_or(parent->effective_cpus,
			   parent->effective_cpus, tmp->addmask);
		cpumask_and(parent->effective_cpus,
			    parent->effective_cpus, cpu_active_mask);
	}
	if (deleting) {
		if (parent == &top_cpuset)
			cpumask_or(subpartitions_cpus,
				   subpartitions_cpus, tmp->delmask);
		cpumask_andnot(parent->effective_cpus,
			       parent->effective_cpus, tmp->delmask);
	}
	if (adding)
		partition_xcpus_del(old_prs, parent, tmp->addmask);
	if (deleting)
		partition_xcpus_add(new_prs, parent, tmp->delmask);

	if (is_partition_valid(parent)) {
		parent->nr_subparts += subparts_delta;
		WARN_ON_ONCE(parent->nr_subparts < 0);
	}

	if (old_prs != new_prs) {
		cs->partition_root_state = new_prs;
		if (new_prs <= 0)
			cs->nr_subparts = 0;
	}

	spin_unlock_irq(&callback_lock);

	if ((old_prs != new_prs) && (cmd == partcmd_update))
@@ -2948,6 +2995,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
	int err = PERR_NONE, old_prs = cs->partition_root_state;
	struct cpuset *parent = parent_cs(cs);
	struct tmpmasks tmpmask;
	bool new_xcpus_state = false;

	if (old_prs == new_prs)
		return 0;
@@ -2977,6 +3025,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
		goto out;

	if (!old_prs) {
		enum partition_cmd cmd = (new_prs == PRS_ROOT)
				       ? partcmd_enable : partcmd_enablei;

		/*
		 * cpus_allowed cannot be empty.
		 */
@@ -2985,19 +3036,18 @@ static int update_prstate(struct cpuset *cs, int new_prs)
			goto out;
		}

		err = update_parent_effective_cpumask(cs, partcmd_enable,
						      NULL, &tmpmask);
		err = update_parent_effective_cpumask(cs, cmd, NULL, &tmpmask);
		/*
		 * If an attempt to become local partition root fails,
		 * try to become a remote partition root instead.
		 */
		if (err && remote_partition_enable(cs, &tmpmask))
		if (err && remote_partition_enable(cs, new_prs, &tmpmask))
			err = 0;
	} else if (old_prs && new_prs) {
		/*
		 * A change in load balance state only, no change in cpumasks.
		 */
		;
		new_xcpus_state = true;
	} else {
		/*
		 * Switching back to member is always allowed even if it
@@ -3029,6 +3079,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
	WRITE_ONCE(cs->prs_err, err);
	if (!is_partition_valid(cs))
		reset_partition_data(cs);
	else if (new_xcpus_state)
		partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
	spin_unlock_irq(&callback_lock);

	/* Force update if switching back to member */
@@ -3386,6 +3438,7 @@ typedef enum {
	FILE_SUBPARTS_CPULIST,
	FILE_EXCLUSIVE_CPULIST,
	FILE_EFFECTIVE_XCPULIST,
	FILE_ISOLATED_CPULIST,
	FILE_CPU_EXCLUSIVE,
	FILE_MEM_EXCLUSIVE,
	FILE_MEM_HARDWALL,
@@ -3582,6 +3635,9 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
	case FILE_SUBPARTS_CPULIST:
		seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus));
		break;
	case FILE_ISOLATED_CPULIST:
		seq_printf(sf, "%*pbl\n", cpumask_pr_args(isolated_cpus));
		break;
	default:
		ret = -EINVAL;
	}
@@ -3875,6 +3931,13 @@ static struct cftype dfl_files[] = {
		.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
	},

	{
		.name = "cpus.isolated",
		.seq_show = cpuset_common_seq_show,
		.private = FILE_ISOLATED_CPULIST,
		.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
	},

	{ }	/* terminate */
};

@@ -4194,6 +4257,7 @@ int __init cpuset_init(void)
	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL));
	BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL));
	BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
	BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL));

	cpumask_setall(top_cpuset.cpus_allowed);
	nodes_setall(top_cpuset.mems_allowed);