Commit de37e502 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'cgroup-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "Two rstat fixes:

   - Out-of-bounds access in the css_rstat_updated() BPF kfunc when
     called with an unchecked user-supplied cpu

   - Over-strict NMI guard after the recent switch to try_cmpxchg left
     sparc and ppc64 unable to queue rstat updates from NMI"

* tag 'cgroup-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: rstat: relax NMI guard after switch to try_cmpxchg
  cgroup/rstat: validate cpu before css_rstat_cpu() access
parents 4a5860ea 22572dbc
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2241,7 +2241,7 @@ void blk_cgroup_bio_start(struct bio *bio)
	}

	u64_stats_update_end_irqrestore(&bis->sync, flags);
	css_rstat_updated(&blkcg->css, cpu);
	__css_rstat_updated(&blkcg->css, cpu);
	put_cpu();
}

+1 −0
Original line number Diff line number Diff line
@@ -777,6 +777,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
/*
 * cgroup scalable recursive statistics.
 */
void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
void css_rstat_flush(struct cgroup_subsys_state *css);

+23 −14
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
#include "cgroup-internal.h"

#include <linux/cpumask.h>
#include <linux/sched/cputime.h>

#include <linux/bpf.h>
@@ -53,7 +54,7 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
}

/**
 * css_rstat_updated - keep track of updated rstat_cpu
 * __css_rstat_updated - keep track of updated rstat_cpu
 * @css: target cgroup subsystem state
 * @cpu: cpu on which rstat_cpu was updated
 *
@@ -63,31 +64,27 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
 *
 * NOTE: if the user needs the guarantee that the updater either add itself in
 * the lockless list or the concurrent flusher flushes its updated stats, a
 * memory barrier is needed before the call to css_rstat_updated() i.e. a
 * memory barrier is needed before the call to __css_rstat_updated() i.e. a
 * barrier after updating the per-cpu stats and before calling
 * css_rstat_updated().
 * __css_rstat_updated().
 */
__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
	struct llist_head *lhead;
	struct css_rstat_cpu *rstatc;
	struct llist_node *self;

	/*
	 * Since bpf programs can call this function, prevent access to
	 * uninitialized rstat pointers.
	 */
	/* Prevent access to uninitialized rstat pointers. */
	if (!css_uses_rstat(css))
		return;

	lockdep_assert_preemption_disabled();

	/*
	 * For archs withnot nmi safe cmpxchg or percpu ops support, ignore
	 * the requests from nmi context.
	 * The lockless insertion below relies on NMI-safe cmpxchg;
	 * bail out in NMI on archs that don't provide it.
	 */
	if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) ||
	     !IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi())
	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && in_nmi())
		return;

	rstatc = css_rstat_cpu(css, cpu);
@@ -125,6 +122,18 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
	llist_add(&rstatc->lnode, lhead);
}

/*
 * BPF-facing wrapper for __css_rstat_updated(). Validate the caller-provided
 * CPU before passing it to the internal rstat updater.
 */
__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
	if (unlikely(cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu)))
		return;

	__css_rstat_updated(css, cpu);
}

static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu)
{
	/* put @css and all ancestors on the corresponding updated lists */
@@ -170,7 +179,7 @@ static void css_process_update_tree(struct cgroup_subsys *ss, int cpu)
		 * flusher flush the stats updated by the updater who have
		 * observed that they are already on the list. The
		 * corresponding barrier pair for this one should be before
		 * css_rstat_updated() by the user.
		 * __css_rstat_updated() by the user.
		 *
		 * For now, there aren't any such user, so not adding the
		 * barrier here but if such a use-case arise, please add
@@ -614,7 +623,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
						 unsigned long flags)
{
	u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
	css_rstat_updated(&cgrp->self, smp_processor_id());
	__css_rstat_updated(&cgrp->self, smp_processor_id());
	put_cpu_ptr(rstatbc);
}

+3 −3
Original line number Diff line number Diff line
@@ -679,7 +679,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, long val,
	if (!val)
		return;

	css_rstat_updated(&memcg->css, cpu);
	__css_rstat_updated(&memcg->css, cpu);
	statc_pcpu = memcg->vmstats_percpu;
	for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
		statc = this_cpu_ptr(statc_pcpu);
@@ -2796,7 +2796,7 @@ static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
		struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];

		/* preemption is disabled in_nmi(). */
		css_rstat_updated(&memcg->css, smp_processor_id());
		__css_rstat_updated(&memcg->css, smp_processor_id());
		if (idx == NR_SLAB_RECLAIMABLE_B)
			atomic_add(nr, &pn->slab_reclaimable);
		else
@@ -3019,7 +3019,7 @@ static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
		mod_memcg_state(memcg, MEMCG_KMEM, val);
	} else {
		/* preemption is disabled in_nmi(). */
		css_rstat_updated(&memcg->css, smp_processor_id());
		__css_rstat_updated(&memcg->css, smp_processor_id());
		atomic_add(val, &memcg->kmem_stat);
	}
}