Commit e06635e2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull slab updates from Vlastimil Babka:

 - Add new slab_strict_numa boot parameter to enforce per-object memory
   policies on top of slab folio policies, for systems where saving cost
   of remote accesses is more important than minimizing slab allocation
   overhead (Christoph Lameter)

 - Fix for freeptr_offset alignment check being too strict for m68k
   (Geert Uytterhoeven)

 - krealloc() fixes for not violating __GFP_ZERO guarantees on
   krealloc() when slub_debug (redzone and object tracking) is enabled
   (Feng Tang)

 - Fix a memory leak in case sysfs registration fails for a slab cache,
   and also no longer fail to create the cache in that case (Hyeonggon
   Yoo)

 - Fix handling of detected consistency problems (due to buggy slab
   user) with slub_debug enabled, so that it does not cause further list
   corruption bugs (yuan.gao)

 - Code cleanup and kerneldocs polishing (Zhen Lei, Vlastimil Babka)

* tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  slab: Fix too strict alignment check in create_cache()
  mm/slab: Allow cache creation to proceed even if sysfs registration fails
  mm/slub: Avoid list corruption when removing a slab from the full list
  mm/slub, kunit: Add testcase for krealloc redzone and zeroing
  mm/slub: Improve redzone check and zeroing for krealloc()
  mm/slub: Consider kfence case for get_orig_size()
  SLUB: Add support for per object memory policies
  mm, slab: add kerneldocs for common SLAB_ flags
  mm/slab: remove duplicate check in create_cache()
  mm/slub: Move krealloc() and related code to slub.c
  mm/kasan: Don't store metadata inside kmalloc object when slub_debug_orig_size is on
parents f5f4745a 9008fe8f
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -6158,6 +6158,16 @@
			For more information see Documentation/mm/slub.rst.
			(slub_nomerge legacy name also accepted for now)

	slab_strict_numa	[MM]
			Support memory policies on a per object level
			in the slab allocator. The default is for memory
			policies to be applied at the folio level when
			a new folio is needed or a partial folio is
			retrieved from the lists. Increases overhead
			in the slab fastpaths but gains more accurate
			NUMA kernel object placement which helps with slow
			interconnects in NUMA systems.

	slram=		[HW,MTD]

	smart2=		[HW]
+9 −0
Original line number Diff line number Diff line
@@ -175,6 +175,15 @@ can be influenced by kernel parameters:
	``slab_max_order`` to 0, what cause minimum possible order of
	slabs allocation.

``slab_strict_numa``
        Enables the application of memory policies on each
        allocation. This results in more accurate placement of
        objects which may result in the reduction of accesses
        to remote nodes. The default is to only apply memory
        policies at the folio level when a new folio is acquired
        or a folio is retrieved from the lists. Enabling this
        option reduces the fastpath performance of the slab allocator.

SLUB Debug output
=================

+41 −19
Original line number Diff line number Diff line
@@ -77,7 +77,17 @@ enum _slab_flag_bits {
#define SLAB_POISON		__SLAB_FLAG_BIT(_SLAB_POISON)
/* Indicate a kmalloc slab */
#define SLAB_KMALLOC		__SLAB_FLAG_BIT(_SLAB_KMALLOC)
/* Align objs on cache lines */
/**
 * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
 *
 * Sufficiently large objects are aligned on cache line boundary. For object
 * size smaller than a half of cache line size, the alignment is on the half of
 * cache line size. In general, if object size is smaller than 1/2^n of cache
 * line size, the alignment is adjusted to 1/2^n.
 *
 * If explicit alignment is also requested by the respective
 * &struct kmem_cache_args field, the greater of both is alignments is applied.
 */
#define SLAB_HWCACHE_ALIGN	__SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
/* Use GFP_DMA memory */
#define SLAB_CACHE_DMA		__SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
@@ -87,8 +97,8 @@ enum _slab_flag_bits {
#define SLAB_STORE_USER		__SLAB_FLAG_BIT(_SLAB_STORE_USER)
/* Panic if kmem_cache_create() fails */
#define SLAB_PANIC		__SLAB_FLAG_BIT(_SLAB_PANIC)
/*
 * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
/**
 * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
 *
 * This delays freeing the SLAB page by a grace period, it does _NOT_
 * delay object freeing. This means that if you do kmem_cache_free()
@@ -99,6 +109,8 @@ enum _slab_flag_bits {
 * stays valid, the trick to using this is relying on an independent
 * object validation pass. Something like:
 *
 * ::
 *
 *  begin:
 *   rcu_read_lock();
 *   obj = lockless_lookup(key);
@@ -137,7 +149,6 @@ enum _slab_flag_bits {
 *
 * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
 */
/* Defer freeing slabs to RCU */
#define SLAB_TYPESAFE_BY_RCU	__SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
/* Trace allocations and frees */
#define SLAB_TRACE		__SLAB_FLAG_BIT(_SLAB_TRACE)
@@ -170,7 +181,12 @@ enum _slab_flag_bits {
#else
# define SLAB_FAILSLAB		__SLAB_FLAG_UNUSED
#endif
/* Account to memcg */
/**
 * define SLAB_ACCOUNT - Account allocations to memcg.
 *
 * All object allocations from this cache will be memcg accounted, regardless of
 * __GFP_ACCOUNT being or not being passed to individual allocations.
 */
#ifdef CONFIG_MEMCG
# define SLAB_ACCOUNT		__SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
@@ -197,7 +213,13 @@ enum _slab_flag_bits {
#endif

/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
/**
 * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
 *
 * Use this flag for caches that have an associated shrinker. As a result, slab
 * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
 * mobility, and are accounted in SReclaimable counter in /proc/meminfo
 */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT	__SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
#else
+42 −0
Original line number Diff line number Diff line
@@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
	KUNIT_EXPECT_EQ(test, 2, slab_errors);
}

static void test_krealloc_redzone_zeroing(struct kunit *test)
{
	u8 *p;
	int i;
	struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
				SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);

	p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
	memset(p, 0xff, 48);

	kasan_disable_current();
	OPTIMIZER_HIDE_VAR(p);

	/* Test shrink */
	p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
	for (i = 40; i < 64; i++)
		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);

	/* Test grow within the same 64B kmalloc object */
	p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
	for (i = 40; i < 56; i++)
		KUNIT_EXPECT_EQ(test, p[i], 0);
	for (i = 56; i < 64; i++)
		KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);

	validate_slab_cache(s);
	KUNIT_EXPECT_EQ(test, 0, slab_errors);

	memset(p, 0xff, 56);
	/* Test grow with allocating a bigger 128B object */
	p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
	for (i = 0; i < 56; i++)
		KUNIT_EXPECT_EQ(test, p[i], 0xff);
	for (i = 56; i < 112; i++)
		KUNIT_EXPECT_EQ(test, p[i], 0);

	kfree(p);
	kasan_enable_current();
	kmem_cache_destroy(s);
}

static int test_init(struct kunit *test)
{
	slab_errors = 0;
@@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
	KUNIT_CASE(test_kmalloc_redzone_access),
	KUNIT_CASE(test_kfree_rcu),
	KUNIT_CASE(test_leak_destroy),
	KUNIT_CASE(test_krealloc_redzone_zeroing),
	{}
};

+5 −2
Original line number Diff line number Diff line
@@ -392,9 +392,12 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
	 * 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
	 *    be touched after it was freed, or
	 * 2. Object has a constructor, which means it's expected to
	 *    retain its content until the next allocation.
	 *    retain its content until the next allocation, or
	 * 3. It is from a kmalloc cache which enables the debug option
	 *    to store original size.
	 */
	if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor) {
	if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
	     slub_debug_orig_size(cache)) {
		cache->kasan_info.free_meta_offset = *size;
		*size += sizeof(struct kasan_free_meta);
		goto free_meta_added;
Loading