Commit b676ac48 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull bpf fixes from Alexei Starovoitov:

 - Followup fixes for resilient spinlock (Kumar Kartikeya Dwivedi):
     - Make res_spin_lock test less verbose, since it was spamming BPF
       CI on failure, and make the check for AA deadlock stronger
     - Fix rebasing mistake and use architecture provided
       res_smp_cond_load_acquire
     - Convert BPF maps (queue_stack and ringbuf) to resilient spinlock
       to address long standing syzbot reports

 - Make sure that classic BPF load instruction from SKF_[NET|LL]_OFF
   offsets works when skb is fragmeneted (Willem de Bruijn)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Convert ringbuf map to rqspinlock
  bpf: Convert queue_stack map to rqspinlock
  bpf: Use architecture provided res_smp_cond_load_acquire
  selftests/bpf: Make res_spin_lock AA test condition stronger
  selftests/net: test sk_filter support for SKF_NET_OFF on frags
  bpf: support SKF_NET_OFF and SKF_LL_OFF on skb frags
  selftests/bpf: Make res_spin_lock test less verbose
parents ecd5d67a a650d389
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@

#endif

#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)

#include <asm-generic/rqspinlock.h>

+12 −23
Original line number Diff line number Diff line
@@ -9,13 +9,14 @@
#include <linux/slab.h>
#include <linux/btf_ids.h>
#include "percpu_freelist.h"
#include <asm/rqspinlock.h>

#define QUEUE_STACK_CREATE_FLAG_MASK \
	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)

struct bpf_queue_stack {
	struct bpf_map map;
	raw_spinlock_t lock;
	rqspinlock_t lock;
	u32 head, tail;
	u32 size; /* max_entries + 1 */

@@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)

	qs->size = size;

	raw_spin_lock_init(&qs->lock);
	raw_res_spin_lock_init(&qs->lock);

	return &qs->map;
}
@@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
	int err = 0;
	void *ptr;

	if (in_nmi()) {
		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
	if (raw_res_spin_lock_irqsave(&qs->lock, flags))
		return -EBUSY;
	} else {
		raw_spin_lock_irqsave(&qs->lock, flags);
	}

	if (queue_stack_map_is_empty(qs)) {
		memset(value, 0, qs->map.value_size);
@@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
	}

out:
	raw_spin_unlock_irqrestore(&qs->lock, flags);
	raw_res_spin_unlock_irqrestore(&qs->lock, flags);
	return err;
}

@@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
	void *ptr;
	u32 index;

	if (in_nmi()) {
		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
	if (raw_res_spin_lock_irqsave(&qs->lock, flags))
		return -EBUSY;
	} else {
		raw_spin_lock_irqsave(&qs->lock, flags);
	}

	if (queue_stack_map_is_empty(qs)) {
		memset(value, 0, qs->map.value_size);
@@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
		qs->head = index;

out:
	raw_spin_unlock_irqrestore(&qs->lock, flags);
	raw_res_spin_unlock_irqrestore(&qs->lock, flags);
	return err;
}

@@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
		return -EINVAL;

	if (in_nmi()) {
		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
	if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags))
		return -EBUSY;
	} else {
		raw_spin_lock_irqsave(&qs->lock, irq_flags);
	}

	if (queue_stack_map_is_full(qs)) {
		if (!replace) {
@@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
		qs->head = 0;

out:
	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
	raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags);
	return err;
}

+7 −10
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/kmemleak.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <asm/rqspinlock.h>

#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)

@@ -29,7 +30,7 @@ struct bpf_ringbuf {
	u64 mask;
	struct page **pages;
	int nr_pages;
	raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
	rqspinlock_t spinlock ____cacheline_aligned_in_smp;
	/* For user-space producer ring buffers, an atomic_t busy bit is used
	 * to synchronize access to the ring buffers in the kernel, rather than
	 * the spinlock that is used for kernel-producer ring buffers. This is
@@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
	if (!rb)
		return NULL;

	raw_spin_lock_init(&rb->spinlock);
	raw_res_spin_lock_init(&rb->spinlock);
	atomic_set(&rb->busy, 0);
	init_waitqueue_head(&rb->waitq);
	init_irq_work(&rb->work, bpf_ringbuf_notify);
@@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)

	cons_pos = smp_load_acquire(&rb->consumer_pos);

	if (in_nmi()) {
		if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
	if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
		return NULL;
	} else {
		raw_spin_lock_irqsave(&rb->spinlock, flags);
	}

	pend_pos = rb->pending_pos;
	prod_pos = rb->producer_pos;
@@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
	 */
	if (new_prod_pos - cons_pos > rb->mask ||
	    new_prod_pos - pend_pos > rb->mask) {
		raw_spin_unlock_irqrestore(&rb->spinlock, flags);
		raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
		return NULL;
	}

@@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
	/* pairs with consumer's smp_load_acquire() */
	smp_store_release(&rb->producer_pos, new_prod_pos);

	raw_spin_unlock_irqrestore(&rb->spinlock, flags);
	raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);

	return (void *)hdr + BPF_RINGBUF_HDR_SZ;
}
+1 −1
Original line number Diff line number Diff line
@@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
	})
#else
#define RES_CHECK_TIMEOUT(ts, ret, mask)			      \
	({ (ret) = check_timeout(&(ts)); })
	({ (ret) = check_timeout((lock), (mask), &(ts)); })
#endif

/*
+44 −36
Original line number Diff line number Diff line
@@ -218,23 +218,35 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
	return 0;
}

static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
{
	if (likely(offset >= 0))
		return offset;

	if (offset >= SKF_NET_OFF)
		return offset - SKF_NET_OFF + skb_network_offset(skb);

	if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
		return offset - SKF_LL_OFF + skb_mac_offset(skb);

	return INT_MIN;
}

BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
	   data, int, headlen, int, offset)
{
	u8 tmp, *ptr;
	u8 tmp;
	const int len = sizeof(tmp);

	if (offset >= 0) {
	offset = bpf_skb_load_helper_convert_offset(skb, offset);
	if (offset == INT_MIN)
		return -EFAULT;

	if (headlen - offset >= len)
		return *(u8 *)(data + offset);
	if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
		return tmp;
	} else {
		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
		if (likely(ptr))
			return *(u8 *)ptr;
	}

	else
		return -EFAULT;
}

@@ -248,20 +260,18 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
	   data, int, headlen, int, offset)
{
	__be16 tmp, *ptr;
	__be16 tmp;
	const int len = sizeof(tmp);

	if (offset >= 0) {
	offset = bpf_skb_load_helper_convert_offset(skb, offset);
	if (offset == INT_MIN)
		return -EFAULT;

	if (headlen - offset >= len)
		return get_unaligned_be16(data + offset);
	if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
		return be16_to_cpu(tmp);
	} else {
		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
		if (likely(ptr))
			return get_unaligned_be16(ptr);
	}

	else
		return -EFAULT;
}

@@ -275,20 +285,18 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
	   data, int, headlen, int, offset)
{
	__be32 tmp, *ptr;
	__be32 tmp;
	const int len = sizeof(tmp);

	if (likely(offset >= 0)) {
	offset = bpf_skb_load_helper_convert_offset(skb, offset);
	if (offset == INT_MIN)
		return -EFAULT;

	if (headlen - offset >= len)
		return get_unaligned_be32(data + offset);
	if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
		return be32_to_cpu(tmp);
	} else {
		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
		if (likely(ptr))
			return get_unaligned_be32(ptr);
	}

	else
		return -EFAULT;
}

Loading