slab: simplify kmalloc_nolock() (073d5f15) · Commits · git / linux-net

mm/slab.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -190,7 +190,6 @@ struct kmem_cache_order_objects {
		*/
		struct kmem_cache {
		struct kmem_cache_cpu __percpu *cpu_slab;
		struct lock_class_key lock_key;
		struct slub_percpu_sheaves __percpu *cpu_sheaves;
		/* Used for retrieving partial slabs, etc. */
		slab_flags_t flags;

mm/slub.c

+29 −115

Original line number	Diff line number	Diff line
		@@ -3690,29 +3690,12 @@ static inline unsigned int init_tid(int cpu)

		static void init_kmem_cache_cpus(struct kmem_cache *s)
		{
		#ifdef CONFIG_PREEMPT_RT
		/*
		* Register lockdep key for non-boot kmem caches to avoid
		* WARN_ON_ONCE(static_obj(key))) in lockdep_register_key()
		*/
		bool finegrain_lockdep = !init_section_contains(s, 1);
		#else
		/*
		* Don't bother with different lockdep classes for each
		* kmem_cache, since we only use local_trylock_irqsave().
		*/
		bool finegrain_lockdep = false;
		#endif
		int cpu;
		struct kmem_cache_cpu *c;

		if (finegrain_lockdep)
		lockdep_register_key(&s->lock_key);
		for_each_possible_cpu(cpu) {
		c = per_cpu_ptr(s->cpu_slab, cpu);
		local_trylock_init(&c->lock);
		if (finegrain_lockdep)
		lockdep_set_class(&c->lock, &s->lock_key);
		c->tid = init_tid(cpu);
		}
		}
		@@ -3799,47 +3782,6 @@ static void deactivate_slab(struct kmem_cache s, struct slab slab,
		}
		}

		/*
		* ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock
		* can be acquired without a deadlock before invoking the function.
		*
		* Without LOCKDEP we trust the code to be correct. kmalloc_nolock() is
		* using local_lock_is_locked() properly before calling local_lock_cpu_slab(),
		* and kmalloc() is not used in an unsupported context.
		*
		* With LOCKDEP, on PREEMPT_RT lockdep does its checking in local_lock_irqsave().
		* On !PREEMPT_RT we use trylock to avoid false positives in NMI, but
		* lockdep_assert() will catch a bug in case:
		* #1
		* kmalloc() -> ___slab_alloc() -> irqsave -> NMI -> bpf -> kmalloc_nolock()
		* or
		* #2
		* kmalloc() -> ___slab_alloc() -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock()
		*
		* On PREEMPT_RT an invocation is not possible from IRQ-off or preempt
		* disabled context. The lock will always be acquired and if needed it
		* block and sleep until the lock is available.
		* #1 is possible in !PREEMPT_RT only.
		* #2 is possible in both with a twist that irqsave is replaced with rt_spinlock:
		* kmalloc() -> ___slab_alloc() -> rt_spin_lock(kmem_cache_A) ->
		* tracepoint/kprobe -> bpf -> kmalloc_nolock() -> rt_spin_lock(kmem_cache_B)
		*
		* local_lock_is_locked() prevents the case kmem_cache_A == kmem_cache_B
		*/
		#if defined(CONFIG_PREEMPT_RT) \|\| !defined(CONFIG_LOCKDEP)
		#define local_lock_cpu_slab(s, flags) \
		local_lock_irqsave(&(s)->cpu_slab->lock, flags)
		#else
		#define local_lock_cpu_slab(s, flags) \
		do { \
		bool __l = local_trylock_irqsave(&(s)->cpu_slab->lock, flags); \
		lockdep_assert(__l); \
		} while (0)
		#endif

		#define local_unlock_cpu_slab(s, flags) \
		local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)

		static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
		{
		unsigned long flags;
		@@ -4405,20 +4347,6 @@ static void ___slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
		return object;
		}

		/*
		* We disallow kprobes in ___slab_alloc() to prevent reentrance
		*
		* kmalloc() -> ___slab_alloc() -> local_lock_cpu_slab() protected part of
		* ___slab_alloc() manipulating c->freelist -> kprobe -> bpf ->
		* kmalloc_nolock() or kfree_nolock() -> __update_cpu_freelist_fast()
		* manipulating c->freelist without lock.
		*
		* This does not prevent kprobe in functions called from ___slab_alloc() such as
		* local_lock_irqsave() itself, and that is fine, we only need to protect the
		* c->freelist manipulation in ___slab_alloc() itself.
		*/
		NOKPROBE_SYMBOL(___slab_alloc);

		static __always_inline void __slab_alloc_node(struct kmem_cache s,
		gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
		{
		@@ -5259,13 +5187,13 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
		if (unlikely(!size))
		return ZERO_SIZE_PTR;

		if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible())
		/*
		* kmalloc_nolock() in PREEMPT_RT is not supported from
		* non-preemptible context because local_lock becomes a
		* sleeping lock on RT.
		* See the comment for the same check in
		* alloc_frozen_pages_nolock_noprof()
		*/
		if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() \|\| in_hardirq()))
		return NULL;

		retry:
		if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
		return NULL;
		@@ -5274,10 +5202,11 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
		if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
		/*
		* kmalloc_nolock() is not supported on architectures that
		* don't implement cmpxchg16b, but debug caches don't use
		* per-cpu slab and per-cpu partial slabs. They rely on
		* kmem_cache_node->list_lock, so kmalloc_nolock() can
		* attempt to allocate from debug caches by
		* don't implement cmpxchg16b and thus need slab_lock()
		* which could be preempted by a nmi.
		* But debug caches don't use that and only rely on
		* kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
		* to allocate from debug caches by
		* spin_trylock_irqsave(&n->list_lock, ...)
		*/
		return NULL;
		@@ -5286,30 +5215,21 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
		if (ret)
		goto success;

		ret = ERR_PTR(-EBUSY);

		/*
		* Do not call slab_alloc_node(), since trylock mode isn't
		* compatible with slab_pre_alloc_hook/should_failslab and
		* kfence_alloc. Hence call __slab_alloc_node() (at most twice)
		* and slab_post_alloc_hook() directly.
		*
		* In !PREEMPT_RT ___slab_alloc() manipulates (freelist,tid) pair
		* in irq saved region. It assumes that the same cpu will not
		* __update_cpu_freelist_fast() into the same (freelist,tid) pair.
		* Therefore use in_nmi() to check whether particular bucket is in
		* irq protected section.
		*
		* If in_nmi() && local_lock_is_locked(s->cpu_slab) then it means that
		* this cpu was interrupted somewhere inside ___slab_alloc() after
		* it did local_lock_irqsave(&s->cpu_slab->lock, flags).
		* In this case fast path with __update_cpu_freelist_fast() is not safe.
		*/
		if (!in_nmi() \|\| !local_lock_is_locked(&s->cpu_slab->lock))
		ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);

		if (PTR_ERR(ret) == -EBUSY) {
		if (can_retry) {
		/*
		* It's possible we failed due to trylock as we preempted someone with
		* the sheaves locked, and the list_lock is also held by another cpu.
		* But it should be rare that multiple kmalloc buckets would have
		* sheaves locked, so try a larger one.
		*/
		if (!ret && can_retry) {
		/* pick the next kmalloc bucket */
		size = s->object_size + 1;
		/*
		@@ -5321,8 +5241,6 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
		can_retry = false;
		goto retry;
		}
		ret = NULL;
		}

		success:
		maybe_wipe_obj_freeptr(s, ret);
		@@ -7374,10 +7292,6 @@ void __kmem_cache_release(struct kmem_cache *s)
		{
		cache_random_seq_destroy(s);
		pcs_destroy(s);
		#ifdef CONFIG_PREEMPT_RT
		if (s->cpu_slab)
		lockdep_unregister_key(&s->lock_key);
		#endif
		free_percpu(s->cpu_slab);
		free_kmem_cache_nodes(s);
		}