Commit 4a98c2ef authored by Amery Hung's avatar Amery Hung Committed by Martin KaFai Lau
Browse files

bpf: Remove task local storage percpu counter



The percpu counter in task local storage is no longer needed as the
underlying bpf_local_storage can now handle deadlock with the help of
rqspinlock. Remove the percpu counter and related migrate_{disable,
enable}.

Since the percpu counter is removed, merge back bpf_task_storage_get()
and bpf_task_storage_get_recur(). This will allow the bpf syscalls and
helpers to run concurrently on the same CPU, removing the spurious
-EBUSY error. bpf_task_storage_get(..., F_CREATE) will now always
succeed with enough free memory unless being called recursively.

Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarAmery Hung <ameryhung@gmail.com>
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260205222916.1788211-7-ameryhung@gmail.com
parent 8dabe34b
Loading
Loading
Loading
Loading
+18 −132
Original line number Diff line number Diff line
@@ -20,29 +20,6 @@

DEFINE_BPF_STORAGE_CACHE(task_cache);

static DEFINE_PER_CPU(int, bpf_task_storage_busy);

static void bpf_task_storage_lock(void)
{
	cant_migrate();
	this_cpu_inc(bpf_task_storage_busy);
}

static void bpf_task_storage_unlock(void)
{
	this_cpu_dec(bpf_task_storage_busy);
}

static bool bpf_task_storage_trylock(void)
{
	cant_migrate();
	if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
		this_cpu_dec(bpf_task_storage_busy);
		return false;
	}
	return true;
}

static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
	struct task_struct *task = owner;
@@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task)
{
	struct bpf_local_storage *local_storage;

	rcu_read_lock_dont_migrate();
	rcu_read_lock();

	local_storage = rcu_dereference(task->bpf_storage);
	if (!local_storage)
		goto out;

	bpf_task_storage_lock();
	bpf_local_storage_destroy(local_storage);
	bpf_task_storage_unlock();
out:
	rcu_read_unlock_migrate();
	rcu_read_unlock();
}

static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
		goto out;
	}

	bpf_task_storage_lock();
	sdata = task_storage_lookup(task, map, true);
	bpf_task_storage_unlock();
	put_pid(pid);
	return sdata ? sdata->data : NULL;
out:
@@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
		goto out;
	}

	bpf_task_storage_lock();
	sdata = bpf_local_storage_update(
		task, (struct bpf_local_storage_map *)map, value, map_flags,
		true, GFP_ATOMIC);
	bpf_task_storage_unlock();

	err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -155,8 +126,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
	return err;
}

static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
			       bool nobusy)
static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
{
	struct bpf_local_storage_data *sdata;

@@ -164,9 +134,6 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
	if (!sdata)
		return -ENOENT;

	if (!nobusy)
		return -EBUSY;

	return bpf_selem_unlink(SELEM(sdata), false);
}

@@ -192,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
		goto out;
	}

	bpf_task_storage_lock();
	err = task_storage_delete(task, map, true);
	bpf_task_storage_unlock();
	err = task_storage_delete(task, map);
out:
	put_pid(pid);
	return err;
}

/* Called by bpf_task_storage_get*() helpers */
static void *__bpf_task_storage_get(struct bpf_map *map,
				    struct task_struct *task, void *value,
				    u64 flags, gfp_t gfp_flags, bool nobusy)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
	   task, void *, value, u64, flags, gfp_t, gfp_flags)
{
	struct bpf_local_storage_data *sdata;

	sdata = task_storage_lookup(task, map, nobusy);
	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
		return (unsigned long)NULL;

	sdata = task_storage_lookup(task, map, true);
	if (sdata)
		return sdata->data;
		return (unsigned long)sdata->data;

	/* only allocate new storage, when the task is refcounted */
	if (refcount_read(&task->usage) &&
	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
		sdata = bpf_local_storage_update(
			task, (struct bpf_local_storage_map *)map, value,
			BPF_NOEXIST, false, gfp_flags);
		return IS_ERR(sdata) ? NULL : sdata->data;
	}

	return NULL;
		return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
	}

/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
	   task, void *, value, u64, flags, gfp_t, gfp_flags)
{
	bool nobusy;
	void *data;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
	return (unsigned long)NULL;

	nobusy = bpf_task_storage_trylock();
	data = __bpf_task_storage_get(map, task, value, flags,
				      gfp_flags, nobusy);
	if (nobusy)
		bpf_task_storage_unlock();
	return (unsigned long)data;
}

/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
	   task, void *, value, u64, flags, gfp_t, gfp_flags)
{
	void *data;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
		return (unsigned long)NULL;

	bpf_task_storage_lock();
	data = __bpf_task_storage_get(map, task, value, flags,
				      gfp_flags, true);
	bpf_task_storage_unlock();
	return (unsigned long)data;
}

BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
	   task)
{
	bool nobusy;
	int ret;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (!task)
		return -EINVAL;

	nobusy = bpf_task_storage_trylock();
	/* This helper must only be called from places where the lifetime of the task
	 * is guaranteed. Either by being refcounted or by being protected
	 * by an RCU read-side critical section.
	 */
	ret = task_storage_delete(task, map, nobusy);
	if (nobusy)
		bpf_task_storage_unlock();
	return ret;
}

BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
	   task)
{
	int ret;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (!task)
		return -EINVAL;

	bpf_task_storage_lock();
	/* This helper must only be called from places where the lifetime of the task
	 * is guaranteed. Either by being refcounted or by being protected
	 * by an RCU read-side critical section.
	 */
	ret = task_storage_delete(task, map, true);
	bpf_task_storage_unlock();
	return ret;
	return task_storage_delete(task, map);
}

static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -311,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)

static void task_storage_map_free(struct bpf_map *map)
{
	bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
	bpf_local_storage_map_free(map, &task_cache, NULL);
}

BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
@@ -330,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = {
	.map_owner_storage_ptr = task_storage_ptr,
};

const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
	.func = bpf_task_storage_get_recur,
	.gpl_only = false,
	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
	.arg1_type = ARG_CONST_MAP_PTR,
	.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
	.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
	.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
	.arg4_type = ARG_ANYTHING,
};

const struct bpf_func_proto bpf_task_storage_get_proto = {
	.func = bpf_task_storage_get,
	.gpl_only = false,
@@ -352,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
	.arg4_type = ARG_ANYTHING,
};

const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
	.func = bpf_task_storage_delete_recur,
	.gpl_only = false,
	.ret_type = RET_INTEGER,
	.arg1_type = ARG_CONST_MAP_PTR,
	.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
	.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};

const struct bpf_func_proto bpf_task_storage_delete_proto = {
	.func = bpf_task_storage_delete,
	.gpl_only = false,
+0 −4
Original line number Diff line number Diff line
@@ -2167,12 +2167,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_get_cgroup_classid_curr_proto;
#endif
	case BPF_FUNC_task_storage_get:
		if (bpf_prog_check_recur(prog))
			return &bpf_task_storage_get_recur_proto;
		return &bpf_task_storage_get_proto;
	case BPF_FUNC_task_storage_delete:
		if (bpf_prog_check_recur(prog))
			return &bpf_task_storage_delete_recur_proto;
		return &bpf_task_storage_delete_proto;
	default:
		break;