Commit 8dabe34b authored by Amery Hung's avatar Amery Hung Committed by Martin KaFai Lau
Browse files

bpf: Change local_storage->lock and b->lock to rqspinlock



Change bpf_local_storage::lock and bpf_local_storage_map_bucket::lock
from raw_spin_lock to rqspinlock.

Finally, propagate errors from raw_res_spin_lock_irqsave() to syscall
return or BPF helper return.

In bpf_local_storage_destroy(), ignore return from
raw_res_spin_lock_irqsave() for now. A later patch will correctly
handle errors correctly in bpf_local_storage_destroy() so that it can
unlink selems even when failing to acquire locks.

For __bpf_local_storage_map_cache(), instead of handling the error,
skip updating the cache.

Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarAmery Hung <ameryhung@gmail.com>
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260205222916.1788211-6-ameryhung@gmail.com
parent 403e935f
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -15,12 +15,13 @@
#include <linux/types.h>
#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
#include <asm/rqspinlock.h>

#define BPF_LOCAL_STORAGE_CACHE_SIZE	16

struct bpf_local_storage_map_bucket {
	struct hlist_head list;
	raw_spinlock_t lock;
	rqspinlock_t lock;
};

/* Thp map is not the primary owner of a bpf_local_storage_elem.
@@ -94,7 +95,7 @@ struct bpf_local_storage {
				 * bpf_local_storage_elem.
				 */
	struct rcu_head rcu;
	raw_spinlock_t lock;	/* Protect adding/removing from the "list" */
	rqspinlock_t lock;	/* Protect adding/removing from the "list" */
	bool use_kmalloc_nolock;
};

+44 −20
Original line number Diff line number Diff line
@@ -321,14 +321,18 @@ static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
	struct bpf_local_storage_map *smap;
	struct bpf_local_storage_map_bucket *b;
	unsigned long flags;
	int err;

	local_storage = rcu_dereference_check(selem->local_storage,
					      bpf_rcu_lock_held());
	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
	b = select_bucket(smap, local_storage);
	raw_spin_lock_irqsave(&b->lock, flags);
	err = raw_res_spin_lock_irqsave(&b->lock, flags);
	if (err)
		return err;

	hlist_del_init_rcu(&selem->map_node);
	raw_spin_unlock_irqrestore(&b->lock, flags);
	raw_res_spin_unlock_irqrestore(&b->lock, flags);

	return 0;
}
@@ -344,11 +348,16 @@ int bpf_selem_link_map(struct bpf_local_storage_map *smap,
{
	struct bpf_local_storage_map_bucket *b;
	unsigned long flags;
	int err;

	b = select_bucket(smap, local_storage);
	raw_spin_lock_irqsave(&b->lock, flags);

	err = raw_res_spin_lock_irqsave(&b->lock, flags);
	if (err)
		return err;

	hlist_add_head_rcu(&selem->map_node, &b->list);
	raw_spin_unlock_irqrestore(&b->lock, flags);
	raw_res_spin_unlock_irqrestore(&b->lock, flags);

	return 0;
}
@@ -365,7 +374,7 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
	bool free_local_storage = false;
	HLIST_HEAD(selem_free_list);
	unsigned long flags;
	int err = 0;
	int err;

	if (unlikely(!selem_linked_to_storage_lockless(selem)))
		/* selem has already been unlinked from sk */
@@ -374,7 +383,10 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
	local_storage = rcu_dereference_check(selem->local_storage,
					      bpf_rcu_lock_held());

	raw_spin_lock_irqsave(&local_storage->lock, flags);
	err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
	if (err)
		return err;

	if (likely(selem_linked_to_storage(selem))) {
		/* Always unlink from map before unlinking from local_storage
		 * because selem will be freed after successfully unlinked from
@@ -388,7 +400,7 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
			local_storage, selem, &selem_free_list);
	}
out:
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
	raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);

	bpf_selem_free_list(&selem_free_list, reuse_now);

@@ -403,16 +415,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
				      struct bpf_local_storage_elem *selem)
{
	unsigned long flags;
	int err;

	/* spinlock is needed to avoid racing with the
	 * parallel delete.  Otherwise, publishing an already
	 * deleted sdata to the cache will become a use-after-free
	 * problem in the next bpf_local_storage_lookup().
	 */
	raw_spin_lock_irqsave(&local_storage->lock, flags);
	err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
	if (err)
		return;

	if (selem_linked_to_storage(selem))
		rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
	raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
}

static int check_flags(const struct bpf_local_storage_data *old_sdata,
@@ -457,14 +473,17 @@ int bpf_local_storage_alloc(void *owner,

	RCU_INIT_POINTER(storage->smap, smap);
	INIT_HLIST_HEAD(&storage->list);
	raw_spin_lock_init(&storage->lock);
	raw_res_spin_lock_init(&storage->lock);
	storage->owner = owner;
	storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;

	bpf_selem_link_storage_nolock(storage, first_selem);

	b = select_bucket(smap, storage);
	raw_spin_lock_irqsave(&b->lock, flags);
	err = raw_res_spin_lock_irqsave(&b->lock, flags);
	if (err)
		goto uncharge;

	bpf_selem_link_map_nolock(b, first_selem);

	owner_storage_ptr =
@@ -482,11 +501,11 @@ int bpf_local_storage_alloc(void *owner,
	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
	if (unlikely(prev_storage)) {
		bpf_selem_unlink_map_nolock(first_selem);
		raw_spin_unlock_irqrestore(&b->lock, flags);
		raw_res_spin_unlock_irqrestore(&b->lock, flags);
		err = -EAGAIN;
		goto uncharge;
	}
	raw_spin_unlock_irqrestore(&b->lock, flags);
	raw_res_spin_unlock_irqrestore(&b->lock, flags);

	return 0;

@@ -569,7 +588,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
	if (!alloc_selem)
		return ERR_PTR(-ENOMEM);

	raw_spin_lock_irqsave(&local_storage->lock, flags);
	err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
	if (err)
		goto free_selem;

	/* Recheck local_storage->list under local_storage->lock */
	if (unlikely(hlist_empty(&local_storage->list))) {
@@ -596,7 +617,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,

	b = select_bucket(smap, local_storage);

	raw_spin_lock_irqsave(&b->lock, b_flags);
	err = raw_res_spin_lock_irqsave(&b->lock, b_flags);
	if (err)
		goto unlock;

	alloc_selem = NULL;
	/* First, link the new selem to the map */
@@ -612,9 +635,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
						&old_selem_free_list);
	}

	raw_spin_unlock_irqrestore(&b->lock, b_flags);
	raw_res_spin_unlock_irqrestore(&b->lock, b_flags);
unlock:
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
	raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
free_selem:
	bpf_selem_free_list(&old_selem_free_list, false);
	if (alloc_selem) {
		mem_uncharge(smap, owner, smap->elem_size);
@@ -699,7 +723,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
	 * when unlinking elem from the local_storage->list and
	 * the map's bucket->list.
	 */
	raw_spin_lock_irqsave(&local_storage->lock, flags);
	raw_res_spin_lock_irqsave(&local_storage->lock, flags);
	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
		/* Always unlink from map before unlinking from
		 * local_storage.
@@ -714,7 +738,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
		free_storage = bpf_selem_unlink_storage_nolock(
			local_storage, selem, &free_selem_list);
	}
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
	raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);

	bpf_selem_free_list(&free_selem_list, true);

@@ -761,7 +785,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,

	for (i = 0; i < nbuckets; i++) {
		INIT_HLIST_HEAD(&smap->buckets[i].list);
		raw_spin_lock_init(&smap->buckets[i].lock);
		raw_res_spin_lock_init(&smap->buckets[i].lock);
	}

	smap->elem_size = offsetof(struct bpf_local_storage_elem,