Commit 9f8413c4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup updates from Tejun Heo:

 - Yafang Shao added task_get_cgroup1() helper to enable a similar BPF
   helper so that BPF progs can be more useful on cgroup1 hierarchies.
   While cgroup1 is mostly in maintenance mode, this addition is very
   small while having an outsized usefulness for users who are still on
   cgroup1. Yafang also optimized root cgroup list access by making it
   RCU protected in the process.

 - Waiman Long optimized rstat operation leading to substantially lower
   and more consistent lock hold time while flushing the hierarchical
   statistics. As the lock can be acquired briefly in various hot paths,
   this reduction has cascading benefits.

 - Waiman also improved the quality of isolation for cpuset's isolated
   partitions. CPUs which are allocated to isolated partitions are now
   excluded from running unbound work items and cpu_is_isolated() test
   which is used by vmstat and memcg to reduce interference now includes
   cpuset isolated CPUs. While it isn't there yet, the hope is
   eventually reaching parity with the isolation level provided by the
   `isolcpus` boot param but in a dynamic manner.

* tag 'cgroup-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: Move rcu_head up near the top of cgroup_root
  cgroup/cpuset: Include isolated cpuset CPUs in cpu_is_isolated() check
  cgroup: Avoid false cacheline sharing of read mostly rstat_cpu
  cgroup/rstat: Optimize cgroup_rstat_updated_list()
  cgroup: Fix documentation for cpu.idle
  cgroup/cpuset: Expose cpuset.cpus.isolated
  workqueue: Move workqueue_set_unbound_cpumask() and its helpers inside CONFIG_SYSFS
  cgroup/rstat: Reduce cpu_lock hold time in cgroup_rstat_flush_locked()
  cgroup/cpuset: Take isolated CPUs out of workqueue unbound cpumask
  cgroup/cpuset: Keep track of CPUs in isolated partitions
  selftests/cgroup: Minor code cleanup and reorganization of test_cpuset_prs.sh
  workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask
  selftests: cgroup: Fixes a typo in a comment
  cgroup: Add a new helper for cgroup1 hierarchy
  cgroup: Add annotation for holding namespace_sem in current_cgns_cgroup_from_root()
  cgroup: Eliminate the need for cgroup_mutex in proc_cgroup_show()
  cgroup: Make operations on the cgroup root_list RCU safe
  cgroup: Remove unnecessary list_empty()
parents bfe8eb3b a7fb0423
Loading
Loading
Loading
Loading
+27 −6
Original line number Diff line number Diff line
@@ -1093,7 +1093,11 @@ All time durations are in microseconds.
	A read-write single value file which exists on non-root
	cgroups.  The default is "100".

	The weight in the range [1, 10000].
	For non idle groups (cpu.idle = 0), the weight is in the
	range [1, 10000].

	If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
	then the weight will show as a 0.

  cpu.weight.nice
	A read-write single value file which exists on non-root
@@ -1157,6 +1161,16 @@ All time durations are in microseconds.
        values similar to the sched_setattr(2). This maximum utilization
        value is used to clamp the task specific maximum utilization clamp.

  cpu.idle
	A read-write single value file which exists on non-root cgroups.
	The default is 0.

	This is the cgroup analog of the per-task SCHED_IDLE sched policy.
	Setting this value to a 1 will make the scheduling policy of the
	cgroup SCHED_IDLE. The threads inside the cgroup will retain their
	own relative priorities, but the cgroup itself will be treated as
	very low priority relative to its peers.



Memory
@@ -2316,6 +2330,13 @@ Cpuset Interface Files
	treated to have an implicit value of "cpuset.cpus" in the
	formation of local partition.

  cpuset.cpus.isolated
	A read-only and root cgroup only multiple values file.

	This file shows the set of all isolated CPUs used in existing
	isolated partitions. It will be empty if no isolated partition
	is created.

  cpuset.cpus.partition
	A read-write single value file which exists on non-root
	cpuset-enabled cgroups.  This flag is owned by the parent cgroup
@@ -2358,11 +2379,11 @@ Cpuset Interface Files
	partition or scheduling domain.  The set of exclusive CPUs is
	determined by the value of its "cpuset.cpus.exclusive.effective".

	When set to "isolated", the CPUs in that partition will
	be in an isolated state without any load balancing from the
	scheduler.  Tasks placed in such a partition with multiple
	CPUs should be carefully distributed and bound to each of the
	individual CPUs for optimal performance.
	When set to "isolated", the CPUs in that partition will be in
	an isolated state without any load balancing from the scheduler
	and excluded from the unbound workqueues.  Tasks placed in such
	a partition with multiple CPUs should be carefully distributed
	and bound to each of the individual CPUs for optimal performance.

	A partition root ("root" or "isolated") can be in one of the
	two possible states - valid or invalid.  An invalid partition
+18 −3
Original line number Diff line number Diff line
@@ -496,6 +496,20 @@ struct cgroup {
	struct cgroup_rstat_cpu __percpu *rstat_cpu;
	struct list_head rstat_css_list;

	/*
	 * Add padding to separate the read mostly rstat_cpu and
	 * rstat_css_list into a different cacheline from the following
	 * rstat_flush_next and *bstat fields which can have frequent updates.
	 */
	CACHELINE_PADDING(_pad_);

	/*
	 * A singly-linked list of cgroup structures to be rstat flushed.
	 * This is a scratch field to be used exclusively by
	 * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
	 */
	struct cgroup	*rstat_flush_next;

	/* cgroup basic resource statistics */
	struct cgroup_base_stat last_bstat;
	struct cgroup_base_stat bstat;
@@ -548,6 +562,10 @@ struct cgroup_root {
	/* Unique id for this hierarchy. */
	int hierarchy_id;

	/* A list running through the active hierarchies */
	struct list_head root_list;
	struct rcu_head rcu;	/* Must be near the top */

	/*
	 * The root cgroup. The containing cgroup_root will be destroyed on its
	 * release. cgrp->ancestors[0] will be used overflowing into the
@@ -561,9 +579,6 @@ struct cgroup_root {
	/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
	atomic_t nr_cgrps;

	/* A list running through the active hierarchies */
	struct list_head root_list;

	/* Hierarchy-specific flags */
	unsigned int flags;

+3 −1
Original line number Diff line number Diff line
@@ -69,6 +69,7 @@ struct css_task_iter {
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
extern spinlock_t css_set_lock;

#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
@@ -386,7 +387,6 @@ static inline void cgroup_unlock(void)
 * as locks used during the cgroup_subsys::attach() methods.
 */
#ifdef CONFIG_PROVE_RCU
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c)					\
	rcu_dereference_check((task)->cgroups,				\
		rcu_read_lock_sched_held() ||				\
@@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}

#endif /* CONFIG_CGROUP_BPF */

struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);

#endif /* _LINUX_CGROUP_H */
+6 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ extern void cpuset_lock(void);
extern void cpuset_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern bool cpuset_cpu_is_isolated(int cpu);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@@ -207,6 +208,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
	return false;
}

static inline bool cpuset_cpu_is_isolated(int cpu)
{
	return false;
}

static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{
	return node_possible_map;
+3 −1
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#define _LINUX_SCHED_ISOLATION_H

#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/init.h>
#include <linux/tick.h>

@@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
static inline bool cpu_is_isolated(int cpu)
{
	return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) ||
		 !housekeeping_test_cpu(cpu, HK_TYPE_TICK);
	       !housekeeping_test_cpu(cpu, HK_TYPE_TICK) ||
	       cpuset_cpu_is_isolated(cpu);
}

#endif /* _LINUX_SCHED_ISOLATION_H */
Loading