Commit e8ec1c94 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-reduce-the-use-of-migrate_-disable-enable'

Hou Tao says:

====================
The use of migrate_{disable|enable} pair in BPF is mainly due to the
introduction of bpf memory allocator and the use of per-CPU data struct
in its internal implementation. The caller needs to disable migration
before invoking the alloc or free APIs of bpf memory allocator, and
enable migration after the invocation.

The main users of bpf memory allocator are various kind of bpf maps in
which the map values or the special fields in the map values are
allocated by using bpf memory allocator.

At present, the running context for bpf program has already disabled
migration explictly or implictly, therefore, when these maps are
manipulated in bpf program, it is OK to not invoke migrate_disable()
and migrate_enable() pair. Howevers, it is not always the case when
these maps are manipulated through bpf syscall, therefore many
migrate_{disable|enable} pairs are added when the map can either be
manipulated by BPF program or BPF syscall.

The initial idea of reducing the use of migrate_{disable|enable} comes
from Alexei [1]. I turned it into a patch set that archives the goals
through the following three methods:

1. remove unnecessary migrate_{disable|enable} pair
when the BPF syscall path also disables migration, it is OK to remove
the pair. Patch #1~#3 fall into this category, while patch #4~#5 are
partially included.

2. move the migrate_{disable|enable} pair from inner callee to outer
   caller
Instead of invoking migrate_disable() in the inner callee, invoking
migrate_disable() in the outer caller to simplify reasoning about when
migrate_disable() is needed. Patch #4~#5 and patch #6~#19 belongs to
this category.

3. add cant_migrate() check in the inner callee
Add cant_migrate() check in the inner callee to ensure the guarantee
that migration is disabled is not broken. Patch #1~#5, #13, #16~#19 also
belong to this category.

Please check the individual patches for more details. Comments are
always welcome.

Change Log:
v2:
  * sqaush the ->map_free related patches (#10~#12, #15) into one patch
  * remove unnecessary cant_migrate() checks.

v1: https://lore.kernel.org/bpf/20250106081900.1665573-1-houtao@huaweicloud.com
====================

Link: https://patch.msgid.link/20250108010728.207536-1-houtao@huaweicloud.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents bfaac2a0 d86088e2
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -735,13 +735,13 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
	u64 ret = 0;
	void *val;

	cant_migrate();

	if (flags != 0)
		return -EINVAL;

	is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
	array = container_of(map, struct bpf_array, map);
	if (is_percpu)
		migrate_disable();
	for (i = 0; i < map->max_entries; i++) {
		if (is_percpu)
			val = this_cpu_ptr(array->pptrs[i]);
@@ -756,8 +756,6 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
			break;
	}

	if (is_percpu)
		migrate_enable();
	return num_elems;
}

+7 −8
Original line number Diff line number Diff line
@@ -15,22 +15,20 @@ static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);

static void bpf_cgrp_storage_lock(void)
{
	migrate_disable();
	cant_migrate();
	this_cpu_inc(bpf_cgrp_storage_busy);
}

static void bpf_cgrp_storage_unlock(void)
{
	this_cpu_dec(bpf_cgrp_storage_busy);
	migrate_enable();
}

static bool bpf_cgrp_storage_trylock(void)
{
	migrate_disable();
	cant_migrate();
	if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
		this_cpu_dec(bpf_cgrp_storage_busy);
		migrate_enable();
		return false;
	}
	return true;
@@ -47,17 +45,18 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
	struct bpf_local_storage *local_storage;

	migrate_disable();
	rcu_read_lock();
	local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
	if (!local_storage) {
		rcu_read_unlock();
		return;
	}
	if (!local_storage)
		goto out;

	bpf_cgrp_storage_lock();
	bpf_local_storage_destroy(local_storage);
	bpf_cgrp_storage_unlock();
out:
	rcu_read_unlock();
	migrate_enable();
}

static struct bpf_local_storage_data *
+5 −4
Original line number Diff line number Diff line
@@ -62,16 +62,17 @@ void bpf_inode_storage_free(struct inode *inode)
	if (!bsb)
		return;

	migrate_disable();
	rcu_read_lock();

	local_storage = rcu_dereference(bsb->storage);
	if (!local_storage) {
		rcu_read_unlock();
		return;
	}
	if (!local_storage)
		goto out;

	bpf_local_storage_destroy(local_storage);
out:
	rcu_read_unlock();
	migrate_enable();
}

static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
+9 −21
Original line number Diff line number Diff line
@@ -81,9 +81,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
		return NULL;

	if (smap->bpf_ma) {
		migrate_disable();
		selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
		migrate_enable();
		if (selem)
			/* Keep the original bpf_map_kzalloc behavior
			 * before started using the bpf_mem_cache_alloc.
@@ -174,18 +172,15 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
		return;
	}

	if (smap) {
		migrate_disable();
	if (smap)
		bpf_mem_cache_free(&smap->storage_ma, local_storage);
		migrate_enable();
	} else {
	else
		/* smap could be NULL if the selem that triggered
		 * this 'local_storage' creation had been long gone.
		 * In this case, directly do call_rcu().
		 */
		call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
}
}

/* rcu tasks trace callback for bpf_ma == false */
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
@@ -217,7 +212,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
	/* The bpf_local_storage_map_free will wait for rcu_barrier */
	smap = rcu_dereference_check(SDATA(selem)->smap, 1);

	migrate_disable();
	bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
	migrate_enable();
	bpf_mem_cache_raw_free(selem);
}

@@ -256,9 +254,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
		 * bpf_mem_cache_free will be able to reuse selem
		 * immediately.
		 */
		migrate_disable();
		bpf_mem_cache_free(&smap->selem_ma, selem);
		migrate_enable();
		return;
	}

@@ -497,15 +493,11 @@ int bpf_local_storage_alloc(void *owner,
	if (err)
		return err;

	if (smap->bpf_ma) {
		migrate_disable();
	if (smap->bpf_ma)
		storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
		migrate_enable();
	} else {
	else
		storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
					  gfp_flags | __GFP_NOWARN);
	}

	if (!storage) {
		err = -ENOMEM;
		goto uncharge;
@@ -902,15 +894,11 @@ void bpf_local_storage_map_free(struct bpf_map *map,
		while ((selem = hlist_entry_safe(
				rcu_dereference_raw(hlist_first_rcu(&b->list)),
				struct bpf_local_storage_elem, map_node))) {
			if (busy_counter) {
				migrate_disable();
			if (busy_counter)
				this_cpu_inc(*busy_counter);
			}
			bpf_selem_unlink(selem, true);
			if (busy_counter) {
			if (busy_counter)
				this_cpu_dec(*busy_counter);
				migrate_enable();
			}
			cond_resched_rcu();
		}
		rcu_read_unlock();
+7 −8
Original line number Diff line number Diff line
@@ -24,22 +24,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);

static void bpf_task_storage_lock(void)
{
	migrate_disable();
	cant_migrate();
	this_cpu_inc(bpf_task_storage_busy);
}

static void bpf_task_storage_unlock(void)
{
	this_cpu_dec(bpf_task_storage_busy);
	migrate_enable();
}

static bool bpf_task_storage_trylock(void)
{
	migrate_disable();
	cant_migrate();
	if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
		this_cpu_dec(bpf_task_storage_busy);
		migrate_enable();
		return false;
	}
	return true;
@@ -72,18 +70,19 @@ void bpf_task_storage_free(struct task_struct *task)
{
	struct bpf_local_storage *local_storage;

	migrate_disable();
	rcu_read_lock();

	local_storage = rcu_dereference(task->bpf_storage);
	if (!local_storage) {
		rcu_read_unlock();
		return;
	}
	if (!local_storage)
		goto out;

	bpf_task_storage_lock();
	bpf_local_storage_destroy(local_storage);
	bpf_task_storage_unlock();
out:
	rcu_read_unlock();
	migrate_enable();
}

static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
Loading