Commit af92793e authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Vlastimil Babka
Browse files

slab: Introduce kmalloc_nolock() and kfree_nolock().



kmalloc_nolock() relies on ability of local_trylock_t to detect
the situation when per-cpu kmem_cache is locked.

In !PREEMPT_RT local_(try)lock_irqsave(&s->cpu_slab->lock, flags)
disables IRQs and marks s->cpu_slab->lock as acquired.
local_lock_is_locked(&s->cpu_slab->lock) returns true when
slab is in the middle of manipulating per-cpu cache
of that specific kmem_cache.

kmalloc_nolock() can be called from any context and can re-enter
into ___slab_alloc():
  kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> NMI -> bpf ->
    kmalloc_nolock() -> ___slab_alloc(cache_B)
or
  kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> tracepoint/kprobe -> bpf ->
    kmalloc_nolock() -> ___slab_alloc(cache_B)

Hence the caller of ___slab_alloc() checks if &s->cpu_slab->lock
can be acquired without a deadlock before invoking the function.
If that specific per-cpu kmem_cache is busy the kmalloc_nolock()
retries in a different kmalloc bucket. The second attempt will
likely succeed, since this cpu locked different kmem_cache.

Similarly, in PREEMPT_RT local_lock_is_locked() returns true when
per-cpu rt_spin_lock is locked by current _task_. In this case
re-entrance into the same kmalloc bucket is unsafe, and
kmalloc_nolock() tries a different bucket that is most likely is
not locked by the current task. Though it may be locked by a
different task it's safe to rt_spin_lock() and sleep on it.

Similar to alloc_pages_nolock() the kmalloc_nolock() returns NULL
immediately if called from hard irq or NMI in PREEMPT_RT.

kfree_nolock() defers freeing to irq_work when local_lock_is_locked()
and (in_nmi() or in PREEMPT_RT).

SLUB_TINY config doesn't use local_lock_is_locked() and relies on
spin_trylock_irqsave(&n->list_lock) to allocate,
while kfree_nolock() always defers to irq_work.

Note, kfree_nolock() must be called _only_ for objects allocated
with kmalloc_nolock(). Debug checks (like kmemleak and kfence)
were skipped on allocation, hence obj = kmalloc(); kfree_nolock(obj);
will miss kmemleak/kfence book keeping and will cause false positives.
large_kmalloc is not supported by either kmalloc_nolock()
or kfree_nolock().

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Reviewed-by: default avatarHarry Yoo <harry.yoo@oracle.com>
Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
parent 76128331
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
}

bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
		       bool still_accessible);
		       bool still_accessible, bool no_quarantine);
/**
 * kasan_slab_free - Poison, initialize, and quarantine a slab object.
 * @object: Object to be freed.
@@ -227,10 +227,12 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
 */
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
					    void *object, bool init,
						bool still_accessible)
					    bool still_accessible,
					    bool no_quarantine)
{
	if (kasan_enabled())
		return __kasan_slab_free(s, object, init, still_accessible);
		return __kasan_slab_free(s, object, init, still_accessible,
					 no_quarantine);
	return false;
}

@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
}

static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
				   bool init, bool still_accessible)
				   bool init, bool still_accessible,
				   bool no_quarantine)
{
	return false;
}
+2 −0
Original line number Diff line number Diff line
@@ -358,6 +358,8 @@ enum objext_flags {
	 * MEMCG_DATA_OBJEXTS.
	 */
	OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
	/* slabobj_ext vector allocated with kmalloc_nolock() */
	OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
	/* the next bit after the last actual flag */
	__NR_OBJEXTS_FLAGS  = (__FIRST_OBJEXT_FLAG << 1),
};
+4 −0
Original line number Diff line number Diff line
@@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
#define krealloc(...)				alloc_hooks(krealloc_noprof(__VA_ARGS__))

void kfree(const void *objp);
void kfree_nolock(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);

@@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...)				alloc_hooks(kmalloc_noprof(__VA_ARGS__))

void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
#define kmalloc_nolock(...)			alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))

#define kmem_buckets_alloc(_b, _size, _flags)	\
	alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))

+1 −0
Original line number Diff line number Diff line
@@ -194,6 +194,7 @@ menu "Slab allocator options"

config SLUB
	def_bool y
	select IRQ_WORK

config KVFREE_RCU_BATCHED
	def_bool y
+4 −1
Original line number Diff line number Diff line
@@ -252,7 +252,7 @@ bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
}

bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
		       bool still_accessible)
		       bool still_accessible, bool no_quarantine)
{
	if (!kasan_arch_is_ready() || is_kfence_address(object))
		return false;
@@ -274,6 +274,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,

	poison_slab_object(cache, object, init);

	if (no_quarantine)
		return false;

	/*
	 * If the object is put into quarantine, do not let slab put the object
	 * onto the freelist for now. The object's metadata is kept until the
Loading