Commit 5ba6bc27 authored by Vlastimil Babka (SUSE)'s avatar Vlastimil Babka (SUSE)
Browse files

slab: decouple pointer to barn from kmem_cache_node

The pointer to barn currently exists in struct kmem_cache_node. That
struct is instantiated for every NUMA node with memory, but we want to
have a barn for every online node (including memoryless).

Thus decouple the two structures. In struct kmem_cache we have an array
for kmem_cache_node pointers that appears to be sized MAX_NUMNODES but
the actual size calculation in kmem_cache_init() uses nr_node_ids.
Therefore we can't just add another array of barn pointers. Instead
change the array to newly introduced struct kmem_cache_per_node_ptrs
holding both kmem_cache_node and barn pointer.

Adjust barn accessor and allocation/initialization code accordingly. For
now no functional change intended, barns are created 1:1 together with
kmem_cache_nodes.

Link: https://patch.msgid.link/20260311-b4-slab-memoryless-barns-v1-1-70ab850be4ce@kernel.org


Signed-off-by: default avatarVlastimil Babka (SUSE) <vbabka@kernel.org>
Reviewed-by: default avatarHarry Yoo <harry.yoo@oracle.com>
Reviewed-by: default avatarHao Li <hao.li@linux.dev>
parent 69d73421
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -191,6 +191,11 @@ struct kmem_cache_order_objects {
	unsigned int x;
};

struct kmem_cache_per_node_ptrs {
	struct node_barn *barn;
	struct kmem_cache_node *node;
};

/*
 * Slab cache management.
 */
@@ -247,7 +252,7 @@ struct kmem_cache {
	struct kmem_cache_stats __percpu *cpu_stats;
#endif

	struct kmem_cache_node *node[MAX_NUMNODES];
	struct kmem_cache_per_node_ptrs per_node[MAX_NUMNODES];
};

/*
+72 −56
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@
 *   0.  cpu_hotplug_lock
 *   1.  slab_mutex (Global Mutex)
 *   2a. kmem_cache->cpu_sheaves->lock (Local trylock)
 *   2b. node->barn->lock (Spinlock)
 *   2b. barn->lock (Spinlock)
 *   2c. node->list_lock (Spinlock)
 *   3.  slab_lock(slab) (Only on some arches)
 *   4.  object_map_lock (Only for debugging)
@@ -136,7 +136,7 @@
 *   or spare sheaf can handle the allocation or free, there is no other
 *   overhead.
 *
 *   node->barn->lock (spinlock)
 *   barn->lock (spinlock)
 *
 *   This lock protects the operations on per-NUMA-node barn. It can quickly
 *   serve an empty or full sheaf if available, and avoid more expensive refill
@@ -436,26 +436,24 @@ struct kmem_cache_node {
	atomic_long_t total_objects;
	struct list_head full;
#endif
	struct node_barn *barn;
};

static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
	return s->node[node];
	return s->per_node[node].node;
}

static inline struct node_barn *get_barn_node(struct kmem_cache *s, int node)
{
	return s->per_node[node].barn;
}

/*
 * Get the barn of the current cpu's closest memory node. It may not exist on
 * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
 * Get the barn of the current cpu's NUMA node. It may be a memoryless node.
 */
static inline struct node_barn *get_barn(struct kmem_cache *s)
{
	struct kmem_cache_node *n = get_node(s, numa_mem_id());

	if (!n)
		return NULL;

	return n->barn;
	return get_barn_node(s, numa_mem_id());
}

/*
@@ -5771,7 +5769,6 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin)

static void rcu_free_sheaf(struct rcu_head *head)
{
	struct kmem_cache_node *n;
	struct slab_sheaf *sheaf;
	struct node_barn *barn = NULL;
	struct kmem_cache *s;
@@ -5794,12 +5791,10 @@ static void rcu_free_sheaf(struct rcu_head *head)
	if (__rcu_free_sheaf_prepare(s, sheaf))
		goto flush;

	n = get_node(s, sheaf->node);
	if (!n)
	barn = get_barn_node(s, sheaf->node);
	if (!barn)
		goto flush;

	barn = n->barn;

	/* due to slab_free_hook() */
	if (unlikely(sheaf->size == 0))
		goto empty;
@@ -7410,7 +7405,7 @@ static inline int calculate_order(unsigned int size)
}

static void
init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
init_kmem_cache_node(struct kmem_cache_node *n)
{
	n->nr_partial = 0;
	spin_lock_init(&n->list_lock);
@@ -7420,9 +7415,6 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
	atomic_long_set(&n->total_objects, 0);
	INIT_LIST_HEAD(&n->full);
#endif
	n->barn = barn;
	if (barn)
		barn_init(barn);
}

#ifdef CONFIG_SLUB_STATS
@@ -7517,8 +7509,8 @@ static void early_kmem_cache_node_alloc(int node)
	n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
	slab->freelist = get_freepointer(kmem_cache_node, n);
	slab->inuse = 1;
	kmem_cache_node->node[node] = n;
	init_kmem_cache_node(n, NULL);
	kmem_cache_node->per_node[node].node = n;
	init_kmem_cache_node(n);
	inc_slabs_node(kmem_cache_node, node, slab->objects);

	/*
@@ -7533,15 +7525,20 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
	int node;
	struct kmem_cache_node *n;

	for_each_kmem_cache_node(s, node, n) {
		if (n->barn) {
			WARN_ON(n->barn->nr_full);
			WARN_ON(n->barn->nr_empty);
			kfree(n->barn);
			n->barn = NULL;
	for_each_node(node) {
		struct node_barn *barn = get_barn_node(s, node);

		if (!barn)
			continue;

		WARN_ON(barn->nr_full);
		WARN_ON(barn->nr_empty);
		kfree(barn);
		s->per_node[node].barn = NULL;
	}

		s->node[node] = NULL;
	for_each_kmem_cache_node(s, node, n) {
		s->per_node[node].node = NULL;
		kmem_cache_free(kmem_cache_node, n);
	}
}
@@ -7562,31 +7559,36 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)

	for_each_node_mask(node, slab_nodes) {
		struct kmem_cache_node *n;
		struct node_barn *barn = NULL;

		if (slab_state == DOWN) {
			early_kmem_cache_node_alloc(node);
			continue;
		}

		if (cache_has_sheaves(s)) {
			barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);

			if (!barn)
				return 0;
		}

		n = kmem_cache_alloc_node(kmem_cache_node,
						GFP_KERNEL, node);
		if (!n) {
			kfree(barn);
		if (!n)
			return 0;

		init_kmem_cache_node(n);
		s->per_node[node].node = n;
	}

		init_kmem_cache_node(n, barn);
	if (slab_state == DOWN || !cache_has_sheaves(s))
		return 1;

	for_each_node_mask(node, slab_nodes) {
		struct node_barn *barn;

		barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);

		s->node[node] = n;
		if (!barn)
			return 0;

		barn_init(barn);
		s->per_node[node].barn = barn;
	}

	return 1;
}

@@ -7875,10 +7877,15 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
	if (cache_has_sheaves(s))
		rcu_barrier();

	for_each_node(node) {
		struct node_barn *barn = get_barn_node(s, node);

		if (barn)
			barn_shrink(s, barn);
	}

	/* Attempt to free all objects */
	for_each_kmem_cache_node(s, node, n) {
		if (n->barn)
			barn_shrink(s, n->barn);
		free_partial(s, n);
		if (n->nr_partial || node_nr_slabs(n))
			return 1;
@@ -8088,14 +8095,18 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
	unsigned long flags;
	int ret = 0;

	for_each_node(node) {
		struct node_barn *barn = get_barn_node(s, node);

		if (barn)
			barn_shrink(s, barn);
	}

	for_each_kmem_cache_node(s, node, n) {
		INIT_LIST_HEAD(&discard);
		for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
			INIT_LIST_HEAD(promote + i);

		if (n->barn)
			barn_shrink(s, n->barn);

		spin_lock_irqsave(&n->list_lock, flags);

		/*
@@ -8184,7 +8195,8 @@ static int slab_mem_going_online_callback(int nid)
		if (get_node(s, nid))
			continue;

		if (cache_has_sheaves(s)) {
		if (cache_has_sheaves(s) && !get_barn_node(s, nid)) {

			barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);

			if (!barn) {
@@ -8205,13 +8217,17 @@ static int slab_mem_going_online_callback(int nid)
			goto out;
		}

		init_kmem_cache_node(n, barn);
		init_kmem_cache_node(n);
		s->per_node[nid].node = n;

		s->node[nid] = n;
		if (barn) {
			barn_init(barn);
			s->per_node[nid].barn = barn;
		}
	}
	/*
	 * Any cache created after this point will also have kmem_cache_node
	 * initialized for the new node.
	 * and barn initialized for the new node.
	 */
	node_set(nid, slab_nodes);
out:
@@ -8303,7 +8319,7 @@ static void __init bootstrap_cache_sheaves(struct kmem_cache *s)
		}

		barn_init(barn);
		get_node(s, node)->barn = barn;
		s->per_node[node].barn = barn;
	}

	for_each_possible_cpu(cpu) {
@@ -8374,8 +8390,8 @@ void __init kmem_cache_init(void)
	slab_state = PARTIAL;

	create_boot_cache(kmem_cache, "kmem_cache",
			offsetof(struct kmem_cache, node) +
				nr_node_ids * sizeof(struct kmem_cache_node *),
			offsetof(struct kmem_cache, per_node) +
				nr_node_ids * sizeof(struct kmem_cache_per_node_ptrs),
			SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);

	kmem_cache = bootstrap(&boot_kmem_cache);