slab: remove the do_slab_free() fastpath (bdc9282f) · Commits · git / linux-net

mm/slub.c

+13 −136

Original line number	Diff line number	Diff line
		@@ -3678,29 +3678,6 @@ static inline unsigned int init_tid(int cpu)
		return cpu;
		}

		static inline void note_cmpxchg_failure(const char *n,
		const struct kmem_cache *s, unsigned long tid)
		{
		#ifdef SLUB_DEBUG_CMPXCHG
		unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);

		pr_info("%s %s: cmpxchg redo ", n, s->name);

		if (IS_ENABLED(CONFIG_PREEMPTION) &&
		tid_to_cpu(tid) != tid_to_cpu(actual_tid)) {
		pr_warn("due to cpu change %d -> %d\n",
		tid_to_cpu(tid), tid_to_cpu(actual_tid));
		} else if (tid_to_event(tid) != tid_to_event(actual_tid)) {
		pr_warn("due to cpu running other code. Event %ld->%ld\n",
		tid_to_event(tid), tid_to_event(actual_tid));
		} else {
		pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
		actual_tid, tid, next_tid(tid));
		}
		#endif
		stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
		}

		static void init_kmem_cache_cpus(struct kmem_cache *s)
		{
		#ifdef CONFIG_PREEMPT_RT
		@@ -4239,18 +4216,6 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
		return true;
		}

		static inline bool
		__update_cpu_freelist_fast(struct kmem_cache *s,
		void freelist_old, void freelist_new,
		unsigned long tid)
		{
		struct freelist_tid old = { .freelist = freelist_old, .tid = tid };
		struct freelist_tid new = { .freelist = freelist_new, .tid = next_tid(tid) };

		return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid,
		&old.freelist_tid, new.freelist_tid);
		}

		/*
		* Get the slab's freelist and do not freeze it.
		*
		@@ -6188,99 +6153,6 @@ void defer_free_barrier(void)
		irq_work_sync(&per_cpu_ptr(&defer_free_objects, cpu)->work);
		}

		/*
		* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
		* can perform fastpath freeing without additional function calls.
		*
		* The fastpath is only possible if we are freeing to the current cpu slab
		* of this processor. This typically the case if we have just allocated
		* the item before.
		*
		* If fastpath is not possible then fall back to __slab_free where we deal
		* with all sorts of special processing.
		*
		* Bulk free of a freelist with several objects (all pointing to the
		* same slab) possible by specifying head and tail ptr, plus objects
		* count (cnt). Bulk free indicated by tail pointer being set.
		*/
		static __always_inline void do_slab_free(struct kmem_cache *s,
		struct slab slab, void head, void *tail,
		int cnt, unsigned long addr)
		{
		/* cnt == 0 signals that it's called from kfree_nolock() */
		bool allow_spin = cnt;
		struct kmem_cache_cpu *c;
		unsigned long tid;
		void **freelist;

		redo:
		/*
		* Determine the currently cpus per cpu slab.
		* The cpu may change afterward. However that does not matter since
		* data is retrieved via this pointer. If we are on the same cpu
		* during the cmpxchg then the free will succeed.
		*/
		c = raw_cpu_ptr(s->cpu_slab);
		tid = READ_ONCE(c->tid);

		/* Same with comment on barrier() in __slab_alloc_node() */
		barrier();

		if (unlikely(slab != c->slab)) {
		if (unlikely(!allow_spin)) {
		/*
		* __slab_free() can locklessly cmpxchg16 into a slab,
		* but then it might need to take spin_lock
		* for further processing.
		* Avoid the complexity and simply add to a deferred list.
		*/
		defer_free(s, head);
		} else {
		__slab_free(s, slab, head, tail, cnt, addr);
		}
		return;
		}

		if (unlikely(!allow_spin)) {
		if ((in_nmi() \|\| !USE_LOCKLESS_FAST_PATH()) &&
		local_lock_is_locked(&s->cpu_slab->lock)) {
		defer_free(s, head);
		return;
		}
		cnt = 1; /* restore cnt. kfree_nolock() frees one object at a time */
		}

		if (USE_LOCKLESS_FAST_PATH()) {
		freelist = READ_ONCE(c->freelist);

		set_freepointer(s, tail, freelist);

		if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
		note_cmpxchg_failure("slab_free", s, tid);
		goto redo;
		}
		} else {
		__maybe_unused unsigned long flags = 0;

		/* Update the free list under the local lock */
		local_lock_cpu_slab(s, flags);
		c = this_cpu_ptr(s->cpu_slab);
		if (unlikely(slab != c->slab)) {
		local_unlock_cpu_slab(s, flags);
		goto redo;
		}
		tid = c->tid;
		freelist = c->freelist;

		set_freepointer(s, tail, freelist);
		c->freelist = head;
		c->tid = next_tid(tid);

		local_unlock_cpu_slab(s, flags);
		}
		stat_add(s, FREE_FASTPATH, cnt);
		}

		static __fastpath_inline
		void slab_free(struct kmem_cache s, struct slab slab, void *object,
		unsigned long addr)
		@@ -6297,7 +6169,7 @@ void slab_free(struct kmem_cache s, struct slab slab, void *object,
		return;
		}

		do_slab_free(s, slab, object, object, 1, addr);
		__slab_free(s, slab, object, object, 1, addr);
		}

		#ifdef CONFIG_MEMCG
		@@ -6306,7 +6178,7 @@ static noinline
		void memcg_alloc_abort_single(struct kmem_cache s, void object)
		{
		if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
		do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
		__slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
		}
		#endif

		@@ -6321,7 +6193,7 @@ void slab_free_bulk(struct kmem_cache s, struct slab slab, void *head,
		* to remove objects, whose reuse must be delayed.
		*/
		if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
		do_slab_free(s, slab, head, tail, cnt, addr);
		__slab_free(s, slab, head, tail, cnt, addr);
		}

		#ifdef CONFIG_SLUB_RCU_DEBUG
		@@ -6347,14 +6219,14 @@ static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)

		/* resume freeing */
		if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
		do_slab_free(s, slab, object, object, 1, _THIS_IP_);
		__slab_free(s, slab, object, object, 1, _THIS_IP_);
		}
		#endif /* CONFIG_SLUB_RCU_DEBUG */

		#ifdef CONFIG_KASAN_GENERIC
		void ___cache_free(struct kmem_cache cache, void x, unsigned long addr)
		{
		do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
		__slab_free(cache, virt_to_slab(x), x, x, 1, addr);
		}
		#endif

		@@ -6570,7 +6442,12 @@ void kfree_nolock(const void *object)
		return;
		}

		do_slab_free(s, slab, x, x, 0, _RET_IP_);
		/*
		* __slab_free() can locklessly cmpxchg16 into a slab, but then it might
		* need to take spin_lock for further processing.
		* Avoid the complexity and simply add to a deferred list.
		*/
		defer_free(s, x);
		}
		EXPORT_SYMBOL_GPL(kfree_nolock);

		@@ -6996,7 +6873,7 @@ static void __kmem_cache_free_bulk(struct kmem_cache s, size_t size, void *p)
		if (kfence_free(df.freelist))
		continue;

		do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
		__slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
		_RET_IP_);
		} while (likely(size));
		}
		@@ -7082,7 +6959,7 @@ __refill_objects(struct kmem_cache s, void *p, gfp_t gfp, unsigned int min,
		cnt++;
		object = get_freepointer(s, object);
		} while (object);
		do_slab_free(s, slab, head, tail, cnt, _RET_IP_);
		__slab_free(s, slab, head, tail, cnt, _RET_IP_);
		}

		if (refilled >= max)