Commit 03ff7351 authored by Frederic Weisbecker's avatar Frederic Weisbecker
Browse files

cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset



Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...

There are many drawbacks to that approach:

1) Most interested subsystems want to know about all isolated CPUs, not
  just those defined on boot time.

2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
  concurrent cpuset changes.

3) Further cpuset modifications are not propagated to subsystems

Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.

Subsystems can rely on RCU to synchronize against concurrent changes.

The propagation mentioned in 3) will be handled in further patches.

[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]

Signed-off-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Reviewed-by: default avatarWaiman Long <longman@redhat.com>
Reviewed-by: default avatarChen Ridong <chenridong@huawei.com>
Signed-off-by: default avatarChen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
parent 27c3a596
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -9,6 +9,11 @@
enum hk_type {
	/* Inverse of boot-time isolcpus= argument */
	HK_TYPE_DOMAIN_BOOT,
	/*
	 * Same as HK_TYPE_DOMAIN_BOOT but also includes the
	 * inverse of cpuset isolated partitions. As such it
	 * is always a subset of HK_TYPE_DOMAIN_BOOT.
	 */
	HK_TYPE_DOMAIN,
	/* Inverse of boot-time isolcpus=managed_irq argument */
	HK_TYPE_MANAGED_IRQ,
@@ -35,6 +40,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
extern bool housekeeping_enabled(enum hk_type type);
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
extern int housekeeping_update(struct cpumask *isol_mask);
extern void __init housekeeping_init(void);

#else
@@ -62,6 +68,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
	return true;
}

static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */

+3 −2
Original line number Diff line number Diff line
@@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
	if (!isolated_cpus_updating)
		return;

	lockdep_assert_cpus_held();

	ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
	WARN_ON_ONCE(ret < 0);

	ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
	WARN_ON_ONCE(ret < 0);

	ret = housekeeping_update(isolated_cpus);
	WARN_ON_ONCE(ret < 0);

	isolated_cpus_updating = false;
}

+69 −6
Original line number Diff line number Diff line
@@ -29,18 +29,48 @@ static struct housekeeping housekeeping;

bool housekeeping_enabled(enum hk_type type)
{
	return !!(housekeeping.flags & BIT(type));
	return !!(READ_ONCE(housekeeping.flags) & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);

static bool housekeeping_dereference_check(enum hk_type type)
{
	if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
		/* Cpuset isn't even writable yet? */
		if (system_state <= SYSTEM_SCHEDULING)
			return true;

		/* CPU hotplug write locked, so cpuset partition can't be overwritten */
		if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
			return true;

		/* Cpuset lock held, partitions not writable */
		if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
			return true;

		return false;
	}

	return true;
}

static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
{
	return rcu_dereference_all_check(housekeeping.cpumasks[type],
					 housekeeping_dereference_check(type));
}

const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
	const struct cpumask *mask = NULL;

	if (static_branch_unlikely(&housekeeping_overridden)) {
		if (housekeeping.flags & BIT(type)) {
			return rcu_dereference_check(housekeeping.cpumasks[type], 1);
		}
		if (READ_ONCE(housekeeping.flags) & BIT(type))
			mask = housekeeping_cpumask_dereference(type);
	}
	return cpu_possible_mask;
	if (!mask)
		mask = cpu_possible_mask;
	return mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);

@@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);

bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
	if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
	if (static_branch_unlikely(&housekeeping_overridden) &&
	    READ_ONCE(housekeeping.flags) & BIT(type))
		return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
	return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);

int housekeeping_update(struct cpumask *isol_mask)
{
	struct cpumask *trial, *old = NULL;

	lockdep_assert_cpus_held();

	trial = kmalloc(cpumask_size(), GFP_KERNEL);
	if (!trial)
		return -ENOMEM;

	cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
	if (!cpumask_intersects(trial, cpu_online_mask)) {
		kfree(trial);
		return -EINVAL;
	}

	if (!housekeeping.flags)
		static_branch_enable_cpuslocked(&housekeeping_overridden);

	if (housekeeping.flags & HK_FLAG_DOMAIN)
		old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
	else
		WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
	rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);

	synchronize_rcu();

	kfree(old);

	return 0;
}

void __init housekeeping_init(void)
{
	enum hk_type type;
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/fs_api.h>