Commit 76d9b92e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull slab updates from Vlastimil Babka:
 "The most prominent change this time is the kmem_buckets based
  hardening of kmalloc() allocations from Kees Cook.

  We have also extended the kmalloc() alignment guarantees for
  non-power-of-two sizes in a way that benefits rust.

  The rest are various cleanups and non-critical fixups.

   - Dedicated bucket allocator (Kees Cook)

     This series [1] enhances the probabilistic defense against heap
     spraying/grooming of CONFIG_RANDOM_KMALLOC_CACHES from last year.

     kmalloc() users that are known to be useful for exploits can get
     completely separate set of kmalloc caches that can't be shared with
     other users. The first converted users are alloc_msg() and
     memdup_user().

     The hardening is enabled by CONFIG_SLAB_BUCKETS.

   - Extended kmalloc() alignment guarantees (Vlastimil Babka)

     For years now we have guaranteed natural alignment for power-of-two
     allocations, but nothing was defined for other sizes (in practice,
     we have two such buckets, kmalloc-96 and kmalloc-192).

     To avoid unnecessary padding in the rust layer due to its alignment
     rules, extend the guarantee so that the alignment is at least the
     largest power-of-two divisor of the requested size.

     This fits what rust needs, is a superset of the existing
     power-of-two guarantee, and does not in practice change the layout
     (and thus does not add overhead due to padding) of the kmalloc-96
     and kmalloc-192 caches, unless slab debugging is enabled for them.

   - Cleanups and non-critical fixups (Chengming Zhou, Suren
     Baghdasaryan, Matthew Willcox, Alex Shi, and Vlastimil Babka)

     Various tweaks related to the new alloc profiling code, folio
     conversion, debugging and more leftovers after SLAB"

Link: https://lore.kernel.org/all/20240701190152.it.631-kees@kernel.org/ [1]

* tag 'slab-for-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/memcg: alignment memcg_data define condition
  mm, slab: move prepare_slab_obj_exts_hook under CONFIG_MEM_ALLOC_PROFILING
  mm, slab: move allocation tagging code in the alloc path into a hook
  mm/util: Use dedicated slab buckets for memdup_user()
  ipc, msg: Use dedicated slab buckets for alloc_msg()
  mm/slab: Introduce kmem_buckets_create() and family
  mm/slab: Introduce kvmalloc_buckets_node() that can take kmem_buckets argument
  mm/slab: Plumb kmem_buckets into __do_kmalloc_node()
  mm/slab: Introduce kmem_buckets typedef
  slab, rust: extend kmalloc() alignment guarantees to remove Rust padding
  slab: delete useless RED_INACTIVE and RED_ACTIVE
  slab: don't put freepointer outside of object if only orig_size
  slab: make check_object() more consistent
  mm: Reduce the number of slab->folio casts
  mm, slab: don't wrap internal functions with alloc_hooks()
parents b2fc97c1 436381ea
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -145,7 +145,9 @@ smaller than page size.

The address of a chunk allocated with `kmalloc` is aligned to at least
ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
alignment is also guaranteed to be at least the respective size.
alignment is also guaranteed to be at least the respective size. For other
sizes, the alignment is guaranteed to be at least the largest power-of-two
divisor of the size.

Chunks allocated with kmalloc() can be resized with krealloc(). Similarly
to kmalloc_array(): a helper for resizing arrays is provided in the form of
+3 −3
Original line number Diff line number Diff line
@@ -1110,7 +1110,7 @@ static inline unsigned int compound_order(struct page *page)
 *
 * Return: The order of the folio.
 */
static inline unsigned int folio_order(struct folio *folio)
static inline unsigned int folio_order(const struct folio *folio)
{
	if (!folio_test_large(folio))
		return 0;
@@ -2150,7 +2150,7 @@ static inline struct folio *folio_next(struct folio *folio)
 * it from being split.  It is not necessary for the folio to be locked.
 * Return: The base-2 logarithm of the size of this folio.
 */
static inline unsigned int folio_shift(struct folio *folio)
static inline unsigned int folio_shift(const struct folio *folio)
{
	return PAGE_SHIFT + folio_order(folio);
}
@@ -2163,7 +2163,7 @@ static inline unsigned int folio_shift(struct folio *folio)
 * it from being split.  It is not necessary for the folio to be locked.
 * Return: The number of bytes in this folio.
 */
static inline size_t folio_size(struct folio *folio)
static inline size_t folio_size(const struct folio *folio)
{
	return PAGE_SIZE << folio_order(folio);
}
+7 −2
Original line number Diff line number Diff line
@@ -169,8 +169,10 @@ struct page {
	/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
	atomic_t _refcount;

#ifdef CONFIG_SLAB_OBJ_EXT
#ifdef CONFIG_MEMCG
	unsigned long memcg_data;
#elif defined(CONFIG_SLAB_OBJ_EXT)
	unsigned long _unused_slab_obj_exts;
#endif

	/*
@@ -298,6 +300,7 @@ typedef struct {
 * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
 * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
 * @_deferred_list: Folios to be split under memory pressure.
 * @_unused_slab_obj_exts: Placeholder to match obj_exts in struct slab.
 *
 * A folio is a physically, virtually and logically contiguous set
 * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -332,8 +335,10 @@ struct folio {
			};
			atomic_t _mapcount;
			atomic_t _refcount;
#ifdef CONFIG_SLAB_OBJ_EXT
#ifdef CONFIG_MEMCG
			unsigned long memcg_data;
#elif defined(CONFIG_SLAB_OBJ_EXT)
			unsigned long _unused_slab_obj_exts;
#endif
#if defined(WANT_PAGE_VIRTUAL)
			void *virtual;
+2 −5
Original line number Diff line number Diff line
@@ -38,11 +38,8 @@
 * Magic nums for obj red zoning.
 * Placed in the first word before and the first word after an obj.
 */
#define	RED_INACTIVE	0x09F911029D74E35BULL	/* when obj is inactive */
#define	RED_ACTIVE	0xD84156C5635688C0ULL	/* when obj is active */

#define SLUB_RED_INACTIVE	0xbb
#define SLUB_RED_ACTIVE		0xcc
#define SLUB_RED_INACTIVE	0xbb	/* when obj is inactive */
#define SLUB_RED_ACTIVE		0xcc	/* when obj is active */

/* ...and for poisoning */
#define	POISON_INUSE	0x5a	/* for use-uninitialised poisoning */
+65 −32
Original line number Diff line number Diff line
@@ -426,8 +426,9 @@ enum kmalloc_cache_type {
	NR_KMALLOC_TYPES
};

extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];

extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];

/*
 * Define gfp bits that should not be set for KMALLOC_NORMAL.
@@ -528,9 +529,6 @@ static_assert(PAGE_SHIFT <= 20);

#include <linux/alloc_tag.h>

void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
#define __kmalloc(...)				alloc_hooks(__kmalloc_noprof(__VA_ARGS__))

/**
 * kmem_cache_alloc - Allocate an object
 * @cachep: The cache to allocate from.
@@ -551,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,

void kmem_cache_free(struct kmem_cache *s, void *objp);

kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
				  unsigned int useroffset, unsigned int usersize,
				  void (*ctor)(void *));

/*
 * Bulk allocation and freeing operations. These are accelerated in an
 * allocator specific way to avoid taking locks repeatedly or building
@@ -568,31 +570,49 @@ static __always_inline void kfree_bulk(size_t size, void **p)
	kmem_cache_free_bulk(NULL, size, p);
}

void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
							 __alloc_size(1);
#define __kmalloc_node(...)			alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))

void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
				   int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...)	alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))

void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
		    __assume_kmalloc_alignment __alloc_size(3);
/*
 * These macros allow declaring a kmem_buckets * parameter alongside size, which
 * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
 * sites don't have to pass NULL.
 */
#ifdef CONFIG_SLAB_BUCKETS
#define DECL_BUCKET_PARAMS(_size, _b)	size_t (_size), kmem_buckets *(_b)
#define PASS_BUCKET_PARAMS(_size, _b)	(_size), (_b)
#define PASS_BUCKET_PARAM(_b)		(_b)
#else
#define DECL_BUCKET_PARAMS(_size, _b)	size_t (_size)
#define PASS_BUCKET_PARAMS(_size, _b)	(_size)
#define PASS_BUCKET_PARAM(_b)		NULL
#endif

void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
		int node, size_t size) __assume_kmalloc_alignment
						__alloc_size(4);
#define kmalloc_trace(...)			alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
/*
 * The following functions are not to be used directly and are intended only
 * for internal use from kmalloc() and kmalloc_node()
 * with the exception of kunit tests
 */

#define kmalloc_node_trace(...)			alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
void *__kmalloc_noprof(size_t size, gfp_t flags)
				__assume_kmalloc_alignment __alloc_size(1);

void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
					      __alloc_size(1);
#define kmalloc_large(...)			alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
				__assume_kmalloc_alignment __alloc_size(1);

void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
				__assume_kmalloc_alignment __alloc_size(3);

void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
							     __alloc_size(1);
#define kmalloc_large_node(...)			alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
				  int node, size_t size)
				__assume_kmalloc_alignment __alloc_size(4);

void *__kmalloc_large_noprof(size_t size, gfp_t flags)
				__assume_page_alignment __alloc_size(1);

void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
				__assume_page_alignment __alloc_size(1);

/**
 * kmalloc - allocate kernel memory
@@ -604,7 +624,8 @@ void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_pag
 *
 * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
 * bytes. For @size of power of two bytes, the alignment is also guaranteed
 * to be at least to the size.
 * to be at least to the size. For other sizes, the alignment is guaranteed to
 * be at least the largest power-of-two divisor of @size.
 *
 * The @flags argument may be one of the GFP flags defined at
 * include/linux/gfp_types.h and described at
@@ -654,10 +675,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
		unsigned int index;

		if (size > KMALLOC_MAX_CACHE_SIZE)
			return kmalloc_large_noprof(size, flags);
			return __kmalloc_large_noprof(size, flags);

		index = kmalloc_index(size);
		return kmalloc_trace_noprof(
		return __kmalloc_cache_noprof(
				kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
				flags, size);
	}
@@ -665,20 +686,26 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...)				alloc_hooks(kmalloc_noprof(__VA_ARGS__))

#define kmem_buckets_alloc(_b, _size, _flags)	\
	alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))

#define kmem_buckets_alloc_track_caller(_b, _size, _flags)	\
	alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))

static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
	if (__builtin_constant_p(size) && size) {
		unsigned int index;

		if (size > KMALLOC_MAX_CACHE_SIZE)
			return kmalloc_large_node_noprof(size, flags, node);
			return __kmalloc_large_node_noprof(size, flags, node);

		index = kmalloc_index(size);
		return kmalloc_node_trace_noprof(
		return __kmalloc_cache_node_noprof(
				kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
				flags, node, size);
	}
	return __kmalloc_node_noprof(size, flags, node);
	return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
}
#define kmalloc_node(...)			alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))

@@ -729,8 +756,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
 */
#define kcalloc(n, size, flags)		kmalloc_array(n, size, (flags) | __GFP_ZERO)

void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
					 unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
	__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
#define kmalloc_node_track_caller(...)		\
	alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))

@@ -756,7 +785,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
		return NULL;
	if (__builtin_constant_p(n) && __builtin_constant_p(size))
		return kmalloc_node_noprof(bytes, flags, node);
	return __kmalloc_node_noprof(bytes, flags, node);
	return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
}
#define kmalloc_array_node(...)			alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))

@@ -780,7 +809,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
#define kzalloc(...)				alloc_hooks(kzalloc_noprof(__VA_ARGS__))
#define kzalloc_node(_size, _flags, _node)	kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)

extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
#define kvmalloc_node_noprof(size, flags, node)	\
	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
#define kvmalloc_node(...)			alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))

#define kvmalloc(_size, _flags)			kvmalloc_node(_size, _flags, NUMA_NO_NODE)
@@ -788,6 +819,8 @@ extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_si
#define kvzalloc(_size, _flags)			kvmalloc(_size, (_flags)|__GFP_ZERO)

#define kvzalloc_node(_size, _flags, _node)	kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
#define kmem_buckets_valloc(_b, _size, _flags)	\
	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))

static inline __alloc_size(1, 2) void *
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
Loading