Commit 97029699 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull more slab updates from Vlastimil Babka:

 - Two stable fixes for kmalloc_nolock() usage from NMI context (Harry
   Yoo)

 - Allow kmalloc_nolock() allocations to be freed with kfree() and thus
   also kfree_rcu() and simplify slabobj_ext handling - we no longer
   need to track how it was allocated to use the matching freeing
   function (Harry Yoo)

* tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab: drop the OBJEXTS_NOSPIN_ALLOC flag from enum objext_flags
  mm/slab: allow freeing kmalloc_nolock()'d objects using kfree[_rcu]()
  mm/slab: use prandom if !allow_spin
  mm/slab: do not access current->mems_allowed_seq if !allow_spin
parents 45a43ac5 27125df9
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -359,8 +359,7 @@ enum objext_flags {
	 * MEMCG_DATA_OBJEXTS.
	 */
	OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
	/* slabobj_ext vector allocated with kmalloc_nolock() */
	OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
	__OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG,
	/* the next bit after the last actual flag */
	__NR_OBJEXTS_FLAGS  = (__FIRST_OBJEXT_FLAG << 1),
};
+2 −2
Original line number Diff line number Diff line
@@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void)
 * either fall back to use of call_rcu() or rearrange the structure to
 * position the rcu_head structure into the first 4096 bytes.
 *
 * The object to be freed can be allocated either by kmalloc() or
 * kmem_cache_alloc().
 * The object to be freed can be allocated either by kmalloc(),
 * kmalloc_nolock(), or kmem_cache_alloc().
 *
 * Note that the allowable offset might decrease in the future.
 *
+10 −12
Original line number Diff line number Diff line
@@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags)
	struct kmemleak_object *object;

	object = find_and_remove_object(ptr, 0, objflags);
	if (!object) {
#ifdef DEBUG
		kmemleak_warn("Freeing unknown object at 0x%08lx\n",
			      ptr);
#endif
	if (!object)
		/*
		 * kmalloc_nolock() -> kfree() calls kmemleak_free()
		 * without kmemleak_alloc().
		 */
		return;
	}
	__delete_object(object);
}

@@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags)
	struct kmemleak_object *object;

	object = __find_and_get_object(ptr, 0, objflags);
	if (!object) {
		kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n",
			      ptr,
			      (color == KMEMLEAK_GREY) ? "Grey" :
			      (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
	if (!object)
		/*
		 * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore()
		 * without kmemleak_alloc().
		 */
		return;
	}
	paint_it(object, color);
	put_object(object);
}
+63 −17
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@
#include <linux/prefetch.h>
#include <linux/memcontrol.h>
#include <linux/random.h>
#include <linux/prandom.h>
#include <kunit/test.h>
#include <kunit/test-bug.h>
#include <linux/sort.h>
@@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
			virt_to_slab(vec)->slab_cache == s);

	new_exts = (unsigned long)vec;
	if (unlikely(!allow_spin))
		new_exts |= OBJEXTS_NOSPIN_ALLOC;
#ifdef CONFIG_MEMCG
	new_exts |= MEMCG_DATA_OBJEXTS;
#endif
@@ -2228,7 +2227,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
	return 0;
}

static inline void free_slab_obj_exts(struct slab *slab)
static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{
	struct slabobj_ext *obj_exts;

@@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab)
	 * the extension for obj_exts is expected to be NULL.
	 */
	mark_objexts_empty(obj_exts);
	if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC))
		kfree_nolock(obj_exts);
	else
	if (allow_spin)
		kfree(obj_exts);
	else
		kfree_nolock(obj_exts);
	slab->obj_exts = 0;
}

@@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
	return 0;
}

static inline void free_slab_obj_exts(struct slab *slab)
static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{
}

@@ -2584,6 +2583,24 @@ struct rcu_delayed_free {
 * Returns true if freeing of the object can proceed, false if its reuse
 * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
 * to KFENCE.
 *
 * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks
 * are invoked, so some free hooks must handle asymmetric hook calls.
 *
 * Alloc hooks called for kmalloc_nolock():
 * - kmsan_slab_alloc()
 * - kasan_slab_alloc()
 * - memcg_slab_post_alloc_hook()
 * - alloc_tagging_slab_alloc_hook()
 *
 * Free hooks that must handle missing corresponding alloc hooks:
 * - kmemleak_free_recursive()
 * - kfence_free()
 *
 * Free hooks that have no alloc hook counterpart, and thus safe to call:
 * - debug_check_no_locks_freed()
 * - debug_check_no_obj_freed()
 * - __kcsan_check_access()
 */
static __always_inline
bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
@@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s,
	return (char *)start + idx;
}

static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);

/* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
			     bool allow_spin)
{
	void *start;
	void *cur;
@@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
		return false;

	freelist_count = oo_objects(s->oo);
	if (allow_spin) {
		pos = get_random_u32_below(freelist_count);
	} else {
		struct rnd_state *state;

		/*
		 * An interrupt or NMI handler might interrupt and change
		 * the state in the middle, but that's safe.
		 */
		state = &get_cpu_var(slab_rnd_state);
		pos = prandom_u32_state(state) % freelist_count;
		put_cpu_var(slab_rnd_state);
	}

	page_limit = slab->objects * s->size;
	start = fixup_red_left(s, slab_address(slab));
@@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
	return 0;
}
static inline void init_freelist_randomization(void) { }
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
				    bool allow_spin)
{
	return false;
}
@@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order,
}

static __always_inline void unaccount_slab(struct slab *slab, int order,
					   struct kmem_cache *s)
					   struct kmem_cache *s, bool allow_spin)
{
	/*
	 * The slab object extensions should now be freed regardless of
	 * whether mem_alloc_profiling_enabled() or not because profiling
	 * might have been disabled after slab->obj_exts got allocated.
	 */
	free_slab_obj_exts(slab);
	free_slab_obj_exts(slab, allow_spin);

	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
			    -(PAGE_SIZE << order));
@@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
	alloc_slab_obj_exts_early(s, slab);
	account_slab(slab, oo_order(oo), s, flags);

	shuffle = shuffle_freelist(s, slab);
	shuffle = shuffle_freelist(s, slab, allow_spin);

	if (!shuffle) {
		start = fixup_red_left(s, start);
@@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin
	page->mapping = NULL;
	__ClearPageSlab(page);
	mm_account_reclaimed_pages(pages);
	unaccount_slab(slab, order, s);
	unaccount_slab(slab, order, s, allow_spin);
	if (allow_spin)
		free_frozen_pages(page, order);
	else
@@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
	struct zone *zone;
	enum zone_type highest_zoneidx = gfp_zone(pc->flags);
	unsigned int cpuset_mems_cookie;
	bool allow_spin = gfpflags_allow_spinning(pc->flags);

	/*
	 * The defrag ratio allows a configuration of the tradeoffs between
@@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
		return NULL;

	do {
		/*
		 * read_mems_allowed_begin() accesses current->mems_allowed_seq,
		 * a seqcount_spinlock_t that is not NMI-safe. Do not access
		 * current->mems_allowed_seq and avoid retry when GFP flags
		 * indicate spinning is not allowed.
		 */
		if (allow_spin)
			cpuset_mems_cookie = read_mems_allowed_begin();

		zonelist = node_zonelist(mempolicy_slab_node(), pc->flags);
		for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
			struct kmem_cache_node *n;
@@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
				}
			}
		}
	} while (read_mems_allowed_retry(cpuset_mems_cookie));
	} while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie));
#endif	/* CONFIG_NUMA */
	return NULL;
}
@@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head)

/**
 * kfree - free previously allocated memory
 * @object: pointer returned by kmalloc() or kmem_cache_alloc()
 * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc()
 *
 * If @object is NULL, no operation is performed.
 */
@@ -6391,6 +6433,7 @@ void kfree(const void *object)
	page = virt_to_page(object);
	slab = page_slab(page);
	if (!slab) {
		/* kmalloc_nolock() doesn't support large kmalloc */
		free_large_kmalloc(page, (void *)object);
		return;
	}
@@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void)
	flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU,
				  0);
	WARN_ON(!flushwq);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
	prandom_init_once(&slab_rnd_state);
#endif
}

int do_kmem_cache_create(struct kmem_cache *s, const char *name,