Commit 2e9a5480 authored by Namhyung Kim's avatar Namhyung Kim Committed by Alexei Starovoitov
Browse files

bpf: Add open coded version of kmem_cache iterator



Add a new open coded iterator for kmem_cache which can be called from a
BPF program like below.  It doesn't take any argument and traverses all
kmem_cache entries.

  struct kmem_cache *pos;

  bpf_for_each(kmem_cache, pos) {
      ...
  }

As it needs to grab slab_mutex, it should be called from sleepable BPF
programs only.

Also update the existing iterator code to use the open coded version
internally as suggested by Andrii.

Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241030222819.1800667-1-namhyung@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent e626a13f
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -3112,6 +3112,9 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_KFUNCS_END(common_btf_ids)

static const struct btf_kfunc_id_set common_kfunc_set = {
+107 −44
Original line number Diff line number Diff line
@@ -8,16 +8,116 @@

#include "../../mm/slab.h" /* kmem_cache, slab_caches and slab_mutex */

/* open-coded version */
struct bpf_iter_kmem_cache {
	__u64 __opaque[1];
} __attribute__((aligned(8)));

struct bpf_iter_kmem_cache_kern {
	struct kmem_cache *pos;
} __attribute__((aligned(8)));

#define KMEM_CACHE_POS_START  ((void *)1L)

__bpf_kfunc_start_defs();

__bpf_kfunc int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it)
{
	struct bpf_iter_kmem_cache_kern *kit = (void *)it;

	BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
	BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));

	kit->pos = KMEM_CACHE_POS_START;
	return 0;
}

__bpf_kfunc struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it)
{
	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
	struct kmem_cache *prev = kit->pos;
	struct kmem_cache *next;
	bool destroy = false;

	if (!prev)
		return NULL;

	mutex_lock(&slab_mutex);

	if (list_empty(&slab_caches)) {
		mutex_unlock(&slab_mutex);
		return NULL;
	}

	if (prev == KMEM_CACHE_POS_START)
		next = list_first_entry(&slab_caches, struct kmem_cache, list);
	else if (list_last_entry(&slab_caches, struct kmem_cache, list) == prev)
		next = NULL;
	else
		next = list_next_entry(prev, list);

	/* boot_caches have negative refcount, don't touch them */
	if (next && next->refcount > 0)
		next->refcount++;

	/* Skip kmem_cache_destroy() for active entries */
	if (prev && prev != KMEM_CACHE_POS_START) {
		if (prev->refcount > 1)
			prev->refcount--;
		else if (prev->refcount == 1)
			destroy = true;
	}

	mutex_unlock(&slab_mutex);

	if (destroy)
		kmem_cache_destroy(prev);

	kit->pos = next;
	return next;
}

__bpf_kfunc void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it)
{
	struct bpf_iter_kmem_cache_kern *kit = (void *)it;
	struct kmem_cache *s = kit->pos;
	bool destroy = false;

	if (s == NULL || s == KMEM_CACHE_POS_START)
		return;

	mutex_lock(&slab_mutex);

	/* Skip kmem_cache_destroy() for active entries */
	if (s->refcount > 1)
		s->refcount--;
	else if (s->refcount == 1)
		destroy = true;

	mutex_unlock(&slab_mutex);

	if (destroy)
		kmem_cache_destroy(s);
}

__bpf_kfunc_end_defs();

struct bpf_iter__kmem_cache {
	__bpf_md_ptr(struct bpf_iter_meta *, meta);
	__bpf_md_ptr(struct kmem_cache *, s);
};

union kmem_cache_iter_priv {
	struct bpf_iter_kmem_cache it;
	struct bpf_iter_kmem_cache_kern kit;
};

static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
	loff_t cnt = 0;
	bool found = false;
	struct kmem_cache *s;
	union kmem_cache_iter_priv *p = seq->private;

	mutex_lock(&slab_mutex);

@@ -43,8 +143,9 @@ static void *kmem_cache_iter_seq_start(struct seq_file *seq, loff_t *pos)
	mutex_unlock(&slab_mutex);

	if (!found)
		return NULL;
		s = NULL;

	p->kit.pos = s;
	return s;
}

@@ -55,63 +156,24 @@ static void kmem_cache_iter_seq_stop(struct seq_file *seq, void *v)
		.meta = &meta,
		.s = v,
	};
	union kmem_cache_iter_priv *p = seq->private;
	struct bpf_prog *prog;
	bool destroy = false;

	meta.seq = seq;
	prog = bpf_iter_get_info(&meta, true);
	if (prog && !ctx.s)
		bpf_iter_run_prog(prog, &ctx);

	if (ctx.s == NULL)
		return;

	mutex_lock(&slab_mutex);

	/* Skip kmem_cache_destroy() for active entries */
	if (ctx.s->refcount > 1)
		ctx.s->refcount--;
	else if (ctx.s->refcount == 1)
		destroy = true;

	mutex_unlock(&slab_mutex);

	if (destroy)
		kmem_cache_destroy(ctx.s);
	bpf_iter_kmem_cache_destroy(&p->it);
}

static void *kmem_cache_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct kmem_cache *s = v;
	struct kmem_cache *next = NULL;
	bool destroy = false;
	union kmem_cache_iter_priv *p = seq->private;

	++*pos;

	mutex_lock(&slab_mutex);

	if (list_last_entry(&slab_caches, struct kmem_cache, list) != s) {
		next = list_next_entry(s, list);

		WARN_ON_ONCE(next->refcount == 0);

		/* boot_caches have negative refcount, don't touch them */
		if (next->refcount > 0)
			next->refcount++;
	}

	/* Skip kmem_cache_destroy() for active entries */
	if (s->refcount > 1)
		s->refcount--;
	else if (s->refcount == 1)
		destroy = true;

	mutex_unlock(&slab_mutex);

	if (destroy)
		kmem_cache_destroy(s);

	return next;
	return bpf_iter_kmem_cache_next(&p->it);
}

static int kmem_cache_iter_seq_show(struct seq_file *seq, void *v)
@@ -143,6 +205,7 @@ BTF_ID_LIST_GLOBAL_SINGLE(bpf_kmem_cache_btf_id, struct, kmem_cache)

static const struct bpf_iter_seq_info kmem_cache_iter_seq_info = {
	.seq_ops		= &kmem_cache_iter_seq_ops,
	.seq_priv_size		= sizeof(union kmem_cache_iter_priv),
};

static void bpf_iter_kmem_cache_show_fdinfo(const struct bpf_iter_aux_info *aux,