Commit 32c894c7 authored by Vlastimil Babka's avatar Vlastimil Babka
Browse files

slab: remove struct kmem_cache_cpu



The cpu slab is not used anymore for allocation or freeing, the
remaining code is for flushing, but it's effectively dead.  Remove the
whole struct kmem_cache_cpu, the flushing code and other orphaned
functions.

The remaining used field of kmem_cache_cpu is the stat array with
CONFIG_SLUB_STATS. Put it instead in a new struct kmem_cache_stats.
In struct kmem_cache, the field is cpu_stats and placed near the
end of the struct.

Reviewed-by: default avatarHao Li <hao.li@linux.dev>
Reviewed-by: default avatarSuren Baghdasaryan <surenb@google.com>
Reviewed-by: default avatarHarry Yoo <harry.yoo@oracle.com>
Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
parent 073d5f15
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -21,14 +21,12 @@
# define system_has_freelist_aba()	system_has_cmpxchg128()
# define try_cmpxchg_freelist		try_cmpxchg128
# endif
#define this_cpu_try_cmpxchg_freelist	this_cpu_try_cmpxchg128
typedef u128 freelist_full_t;
#else /* CONFIG_64BIT */
# ifdef system_has_cmpxchg64
# define system_has_freelist_aba()	system_has_cmpxchg64()
# define try_cmpxchg_freelist		try_cmpxchg64
# endif
#define this_cpu_try_cmpxchg_freelist	this_cpu_try_cmpxchg64
typedef u64 freelist_full_t;
#endif /* CONFIG_64BIT */

@@ -189,7 +187,6 @@ struct kmem_cache_order_objects {
 * Slab cache management.
 */
struct kmem_cache {
	struct kmem_cache_cpu __percpu *cpu_slab;
	struct slub_percpu_sheaves __percpu *cpu_sheaves;
	/* Used for retrieving partial slabs, etc. */
	slab_flags_t flags;
@@ -238,6 +235,10 @@ struct kmem_cache {
	unsigned int usersize;		/* Usercopy region size */
#endif

#ifdef CONFIG_SLUB_STATS
	struct kmem_cache_stats __percpu *cpu_stats;
#endif

	struct kmem_cache_node *node[MAX_NUMNODES];
};

+23 −281
Original line number Diff line number Diff line
@@ -405,28 +405,11 @@ enum stat_item {
	NR_SLUB_STAT_ITEMS
};

struct freelist_tid {
	union {
		struct {
			void *freelist;		/* Pointer to next available object */
			unsigned long tid;	/* Globally unique transaction id */
		};
		freelist_full_t freelist_tid;
	};
};

/*
 * When changing the layout, make sure freelist and tid are still compatible
 * with this_cpu_cmpxchg_double() alignment requirements.
 */
struct kmem_cache_cpu {
	struct freelist_tid;
	struct slab *slab;	/* The slab from which we are allocating */
	local_trylock_t lock;	/* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
struct kmem_cache_stats {
	unsigned int stat[NR_SLUB_STAT_ITEMS];
#endif
};
#endif

static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
@@ -435,7 +418,7 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
	 * The rmw is racy on a preemptible kernel but this is acceptable, so
	 * avoid this_cpu_add()'s irq-disable overhead.
	 */
	raw_cpu_inc(s->cpu_slab->stat[si]);
	raw_cpu_inc(s->cpu_stats->stat[si]);
#endif
}

@@ -443,7 +426,7 @@ static inline
void stat_add(const struct kmem_cache *s, enum stat_item si, int v)
{
#ifdef CONFIG_SLUB_STATS
	raw_cpu_add(s->cpu_slab->stat[si], v);
	raw_cpu_add(s->cpu_stats->stat[si], v);
#endif
}

@@ -532,7 +515,7 @@ static inline struct node_barn *get_barn(struct kmem_cache *s)
static nodemask_t slab_nodes;

/*
 * Workqueue used for flush_cpu_slab().
 * Workqueue used for flushing cpu and kfree_rcu sheaves.
 */
static struct workqueue_struct *flushwq;

@@ -1154,20 +1137,6 @@ static void object_err(struct kmem_cache *s, struct slab *slab,
	WARN_ON(1);
}

static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
			       void **freelist, void *nextfree)
{
	if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
	    !check_valid_pointer(s, slab, nextfree) && freelist) {
		object_err(s, slab, *freelist, "Freechain corrupt");
		*freelist = NULL;
		slab_fix(s, "Isolate corrupted freechain");
		return true;
	}

	return false;
}

static void __slab_err(struct slab *slab)
{
	if (slab_in_kunit_test())
@@ -1949,11 +1918,6 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
							int objects) {}
static inline void dec_slabs_node(struct kmem_cache *s, int node,
							int objects) {}
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
			       void **freelist, void *nextfree)
{
	return false;
}
#endif /* CONFIG_SLUB_DEBUG */

/*
@@ -3651,191 +3615,6 @@ static void *get_from_partial(struct kmem_cache *s, int node,
	return get_from_any_partial(s, pc);
}

#ifdef CONFIG_PREEMPTION
/*
 * Calculate the next globally unique transaction for disambiguation
 * during cmpxchg. The transactions start with the cpu number and are then
 * incremented by CONFIG_NR_CPUS.
 */
#define TID_STEP  roundup_pow_of_two(CONFIG_NR_CPUS)
#else
/*
 * No preemption supported therefore also no need to check for
 * different cpus.
 */
#define TID_STEP 1
#endif /* CONFIG_PREEMPTION */

static inline unsigned long next_tid(unsigned long tid)
{
	return tid + TID_STEP;
}

#ifdef SLUB_DEBUG_CMPXCHG
static inline unsigned int tid_to_cpu(unsigned long tid)
{
	return tid % TID_STEP;
}

static inline unsigned long tid_to_event(unsigned long tid)
{
	return tid / TID_STEP;
}
#endif

static inline unsigned int init_tid(int cpu)
{
	return cpu;
}

static void init_kmem_cache_cpus(struct kmem_cache *s)
{
	int cpu;
	struct kmem_cache_cpu *c;

	for_each_possible_cpu(cpu) {
		c = per_cpu_ptr(s->cpu_slab, cpu);
		local_trylock_init(&c->lock);
		c->tid = init_tid(cpu);
	}
}

/*
 * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
 * unfreezes the slabs and puts it on the proper list.
 * Assumes the slab has been already safely taken away from kmem_cache_cpu
 * by the caller.
 */
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
			    void *freelist)
{
	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
	int free_delta = 0;
	void *nextfree, *freelist_iter, *freelist_tail;
	int tail = DEACTIVATE_TO_HEAD;
	unsigned long flags = 0;
	struct freelist_counters old, new;

	if (READ_ONCE(slab->freelist)) {
		stat(s, DEACTIVATE_REMOTE_FREES);
		tail = DEACTIVATE_TO_TAIL;
	}

	/*
	 * Stage one: Count the objects on cpu's freelist as free_delta and
	 * remember the last object in freelist_tail for later splicing.
	 */
	freelist_tail = NULL;
	freelist_iter = freelist;
	while (freelist_iter) {
		nextfree = get_freepointer(s, freelist_iter);

		/*
		 * If 'nextfree' is invalid, it is possible that the object at
		 * 'freelist_iter' is already corrupted.  So isolate all objects
		 * starting at 'freelist_iter' by skipping them.
		 */
		if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
			break;

		freelist_tail = freelist_iter;
		free_delta++;

		freelist_iter = nextfree;
	}

	/*
	 * Stage two: Unfreeze the slab while splicing the per-cpu
	 * freelist to the head of slab's freelist.
	 */
	do {
		old.freelist = READ_ONCE(slab->freelist);
		old.counters = READ_ONCE(slab->counters);
		VM_BUG_ON(!old.frozen);

		/* Determine target state of the slab */
		new.counters = old.counters;
		new.frozen = 0;
		if (freelist_tail) {
			new.inuse -= free_delta;
			set_freepointer(s, freelist_tail, old.freelist);
			new.freelist = freelist;
		} else {
			new.freelist = old.freelist;
		}
	} while (!slab_update_freelist(s, slab, &old, &new, "unfreezing slab"));

	/*
	 * Stage three: Manipulate the slab list based on the updated state.
	 */
	if (!new.inuse && n->nr_partial >= s->min_partial) {
		stat(s, DEACTIVATE_EMPTY);
		discard_slab(s, slab);
		stat(s, FREE_SLAB);
	} else if (new.freelist) {
		spin_lock_irqsave(&n->list_lock, flags);
		add_partial(n, slab, tail);
		spin_unlock_irqrestore(&n->list_lock, flags);
		stat(s, tail);
	} else {
		stat(s, DEACTIVATE_FULL);
	}
}

static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
	unsigned long flags;
	struct slab *slab;
	void *freelist;

	local_lock_irqsave(&s->cpu_slab->lock, flags);

	slab = c->slab;
	freelist = c->freelist;

	c->slab = NULL;
	c->freelist = NULL;
	c->tid = next_tid(c->tid);

	local_unlock_irqrestore(&s->cpu_slab->lock, flags);

	if (slab) {
		deactivate_slab(s, slab, freelist);
		stat(s, CPUSLAB_FLUSH);
	}
}

static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
	void *freelist = c->freelist;
	struct slab *slab = c->slab;

	c->slab = NULL;
	c->freelist = NULL;
	c->tid = next_tid(c->tid);

	if (slab) {
		deactivate_slab(s, slab, freelist);
		stat(s, CPUSLAB_FLUSH);
	}
}

static inline void flush_this_cpu_slab(struct kmem_cache *s)
{
	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);

	if (c->slab)
		flush_slab(s, c);
}

static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);

	return c->slab;
}

static bool has_pcs_used(int cpu, struct kmem_cache *s)
{
	struct slub_percpu_sheaves *pcs;
@@ -3849,11 +3628,11 @@ static bool has_pcs_used(int cpu, struct kmem_cache *s)
}

/*
 * Flush cpu slab.
 * Flush percpu sheaves
 *
 * Called from CPU work handler with migration disabled.
 */
static void flush_cpu_slab(struct work_struct *w)
static void flush_cpu_sheaves(struct work_struct *w)
{
	struct kmem_cache *s;
	struct slub_flush_work *sfw;
@@ -3864,8 +3643,6 @@ static void flush_cpu_slab(struct work_struct *w)

	if (cache_has_sheaves(s))
		pcs_flush_all(s);

	flush_this_cpu_slab(s);
}

static void flush_all_cpus_locked(struct kmem_cache *s)
@@ -3878,11 +3655,11 @@ static void flush_all_cpus_locked(struct kmem_cache *s)

	for_each_online_cpu(cpu) {
		sfw = &per_cpu(slub_flush, cpu);
		if (!has_cpu_slab(cpu, s) && !has_pcs_used(cpu, s)) {
		if (!has_pcs_used(cpu, s)) {
			sfw->skip = true;
			continue;
		}
		INIT_WORK(&sfw->work, flush_cpu_slab);
		INIT_WORK(&sfw->work, flush_cpu_sheaves);
		sfw->skip = false;
		sfw->s = s;
		queue_work_on(cpu, flushwq, &sfw->work);
@@ -3988,7 +3765,6 @@ static int slub_cpu_dead(unsigned int cpu)

	mutex_lock(&slab_mutex);
	list_for_each_entry(s, &slab_caches, list) {
		__flush_cpu_slab(s, cpu);
		if (cache_has_sheaves(s))
			__pcs_flush_all_cpu(s, cpu);
	}
@@ -7162,26 +6938,21 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
		barn_init(barn);
}

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
#ifdef CONFIG_SLUB_STATS
static inline int alloc_kmem_cache_stats(struct kmem_cache *s)
{
	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
			NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
			sizeof(struct kmem_cache_cpu));
			sizeof(struct kmem_cache_stats));

	/*
	 * Must align to double word boundary for the double cmpxchg
	 * instructions to work; see __pcpu_double_call_return_bool().
	 */
	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
				     2 * sizeof(void *));
	s->cpu_stats = alloc_percpu(struct kmem_cache_stats);

	if (!s->cpu_slab)
	if (!s->cpu_stats)
		return 0;

	init_kmem_cache_cpus(s);

	return 1;
}
#endif

static int init_percpu_sheaves(struct kmem_cache *s)
{
@@ -7292,7 +7063,9 @@ void __kmem_cache_release(struct kmem_cache *s)
{
	cache_random_seq_destroy(s);
	pcs_destroy(s);
	free_percpu(s->cpu_slab);
#ifdef CONFIG_SLUB_STATS
	free_percpu(s->cpu_stats);
#endif
	free_kmem_cache_nodes(s);
}

@@ -7989,12 +7762,6 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)

	memcpy(s, static_cache, kmem_cache->object_size);

	/*
	 * This runs very early, and only the boot processor is supposed to be
	 * up.  Even if it weren't true, IRQs are not up so we couldn't fire
	 * IPIs around.
	 */
	__flush_cpu_slab(s, smp_processor_id());
	for_each_kmem_cache_node(s, node, n) {
		struct slab *p;

@@ -8209,8 +7976,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
	if (!init_kmem_cache_nodes(s))
		goto out;

	if (!alloc_kmem_cache_cpus(s))
#ifdef CONFIG_SLUB_STATS
	if (!alloc_kmem_cache_stats(s))
		goto out;
#endif

	err = init_percpu_sheaves(s);
	if (err)
@@ -8529,33 +8298,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
	if (!nodes)
		return -ENOMEM;

	if (flags & SO_CPU) {
		int cpu;

		for_each_possible_cpu(cpu) {
			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
							       cpu);
			int node;
			struct slab *slab;

			slab = READ_ONCE(c->slab);
			if (!slab)
				continue;

			node = slab_nid(slab);
			if (flags & SO_TOTAL)
				x = slab->objects;
			else if (flags & SO_OBJECTS)
				x = slab->inuse;
			else
				x = 1;

			total += x;
			nodes[node] += x;

		}
	}

	/*
	 * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
	 * already held which will conflict with an existing lock order:
@@ -8926,7 +8668,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
		return -ENOMEM;

	for_each_online_cpu(cpu) {
		unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
		unsigned int x = per_cpu_ptr(s->cpu_stats, cpu)->stat[si];

		data[cpu] = x;
		sum += x;
@@ -8952,7 +8694,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
	int cpu;

	for_each_online_cpu(cpu)
		per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
		per_cpu_ptr(s->cpu_stats, cpu)->stat[si] = 0;
}

#define STAT_ATTR(si, text) 					\