Commit 5b5f1efb authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Apparently, nf_conntrack_bridge changes the way in which fragments
   are handled, dealing to packet drop. From Huajian Yang.

2) Add a selftest to stress the conntrack subsystem, from Florian Westphal.

3) nft_quota depletion is off-by-one byte, Zhongqiu Duan.

4) Rewrites the procfs to read the conntrack table to speed it up,
   from Florian Westphal.

5) Two patches to prevent overflow in nft_pipapo lookup table and to
   clamp the maximum bucket size.

6) Update nft_fib selftest to check for loopback packet bypass.
   From Florian Westphal.

netfilter pull request 25-05-06

* tag 'nf-next-25-05-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  selftests: netfilter: nft_fib.sh: check lo packets bypass fib lookup
  netfilter: nft_set_pipapo: clamp maximum map bucket size to INT_MAX
  netfilter: nft_set_pipapo: prevent overflow in lookup table allocation
  netfilter: nf_conntrack: speed up reads from nf_conntrack proc file
  netfilter: nft_quota: match correctly when the quota just depleted
  selftests: netfilter: add conntrack stress test
  netfilter: bridge: Move specific fragmented packet to slow_path instead of dropping it
====================

Link: https://patch.msgid.link/20250505234151.228057-1-pablo@netfilter.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents fbaeb7b0 fc91d5e6
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
		struct ip_fraglist_iter iter;
		struct sk_buff *frag;

		if (first_len - hlen > mtu ||
		    skb_headroom(skb) < ll_rs)
		if (first_len - hlen > mtu)
			goto blackhole;

		if (skb_cloned(skb))
		if (skb_cloned(skb) ||
		    skb_headroom(skb) < ll_rs)
			goto slow_path;

		skb_walk_frags(skb, frag) {
			if (frag->len > mtu ||
			    skb_headroom(frag) < hlen + ll_rs)
			if (frag->len > mtu)
				goto blackhole;

			if (skb_shared(frag))
			if (skb_shared(frag) ||
			    skb_headroom(frag) < hlen + ll_rs)
				goto slow_path;
		}

+6 −6
Original line number Diff line number Diff line
@@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
		struct ip6_fraglist_iter iter;
		struct sk_buff *frag2;

		if (first_len - hlen > mtu ||
		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
		if (first_len - hlen > mtu)
			goto blackhole;

		if (skb_cloned(skb))
		if (skb_cloned(skb) ||
		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
			goto slow_path;

		skb_walk_frags(skb, frag2) {
			if (frag2->len > mtu ||
			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
			if (frag2->len > mtu)
				goto blackhole;

			/* Partially cloned skb? */
			if (skb_shared(frag2))
			if (skb_shared(frag2) ||
			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
				goto slow_path;
		}

+53 −35
Original line number Diff line number Diff line
@@ -98,69 +98,87 @@ struct ct_iter_state {
	struct seq_net_private p;
	struct hlist_nulls_head *hash;
	unsigned int htable_size;
	unsigned int skip_elems;
	unsigned int bucket;
	u_int64_t time_now;
};

static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
static struct nf_conntrack_tuple_hash *ct_get_next(const struct net *net,
						   struct ct_iter_state *st)
{
	struct ct_iter_state *st = seq->private;
	struct nf_conntrack_tuple_hash *h;
	struct hlist_nulls_node *n;
	unsigned int i;

	for (st->bucket = 0;
	     st->bucket < st->htable_size;
	     st->bucket++) {
		n = rcu_dereference(
			hlist_nulls_first_rcu(&st->hash[st->bucket]));
		if (!is_a_nulls(n))
			return n;
	}
	return NULL;
}
	for (i = st->bucket; i < st->htable_size; i++) {
		unsigned int skip = 0;

static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
				      struct hlist_nulls_node *head)
{
	struct ct_iter_state *st = seq->private;
restart:
		hlist_nulls_for_each_entry_rcu(h, n, &st->hash[i], hnnode) {
			struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
			struct hlist_nulls_node *tmp = n;

	head = rcu_dereference(hlist_nulls_next_rcu(head));
	while (is_a_nulls(head)) {
		if (likely(get_nulls_value(head) == st->bucket)) {
			if (++st->bucket >= st->htable_size)
				return NULL;
		}
		head = rcu_dereference(
			hlist_nulls_first_rcu(&st->hash[st->bucket]));
			if (!net_eq(net, nf_ct_net(ct)))
				continue;

			if (++skip <= st->skip_elems)
				continue;

			/* h should be returned, skip to nulls marker. */
			while (!is_a_nulls(tmp))
				tmp = rcu_dereference(hlist_nulls_next_rcu(tmp));

			/* check if h is still linked to hash[i] */
			if (get_nulls_value(tmp) != i) {
				skip = 0;
				goto restart;
			}
	return head;

			st->skip_elems = skip;
			st->bucket = i;
			return h;
		}

static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
{
	struct hlist_nulls_node *head = ct_get_first(seq);
		skip = 0;
		if (get_nulls_value(n) != i)
			goto restart;

		st->skip_elems = 0;
	}

	if (head)
		while (pos && (head = ct_get_next(seq, head)))
			pos--;
	return pos ? NULL : head;
	st->bucket = i;
	return NULL;
}

static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(RCU)
{
	struct ct_iter_state *st = seq->private;
	struct net *net = seq_file_net(seq);

	st->time_now = ktime_get_real_ns();
	rcu_read_lock();

	nf_conntrack_get_ht(&st->hash, &st->htable_size);
	return ct_get_idx(seq, *pos);

	if (*pos == 0) {
		st->skip_elems = 0;
		st->bucket = 0;
	} else if (st->skip_elems) {
		/* resume from last dumped entry */
		st->skip_elems--;
	}

	return ct_get_next(net, st);
}

static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
	struct ct_iter_state *st = s->private;
	struct net *net = seq_file_net(s);

	(*pos)++;
	return ct_get_next(s, v);
	return ct_get_next(net, st);
}

static void ct_seq_stop(struct seq_file *s, void *v)
+13 −7
Original line number Diff line number Diff line
@@ -19,10 +19,16 @@ struct nft_quota {
};

static inline bool nft_overquota(struct nft_quota *priv,
				 const struct sk_buff *skb)
				 const struct sk_buff *skb,
				 bool *report)
{
	return atomic64_add_return(skb->len, priv->consumed) >=
	       atomic64_read(&priv->quota);
	u64 consumed = atomic64_add_return(skb->len, priv->consumed);
	u64 quota = atomic64_read(&priv->quota);

	if (report)
		*report = consumed >= quota;

	return consumed > quota;
}

static inline bool nft_quota_invert(struct nft_quota *priv)
@@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv,
				     struct nft_regs *regs,
				     const struct nft_pktinfo *pkt)
{
	if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
	if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv))
		regs->verdict.code = NFT_BREAK;
}

@@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj,
			       const struct nft_pktinfo *pkt)
{
	struct nft_quota *priv = nft_obj_data(obj);
	bool overquota;
	bool overquota, report;

	overquota = nft_overquota(priv, pkt->skb);
	overquota = nft_overquota(priv, pkt->skb, &report);
	if (overquota ^ nft_quota_invert(priv))
		regs->verdict.code = NFT_BREAK;

	if (overquota &&
	if (report &&
	    !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
		nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
			       NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
+50 −14
Original line number Diff line number Diff line
@@ -663,6 +663,9 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
	    check_add_overflow(rules, extra, &rules_alloc))
		return -EOVERFLOW;

	if (rules_alloc > (INT_MAX / sizeof(*new_mt)))
		return -ENOMEM;

	new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT);
	if (!new_mt)
		return -ENOMEM;
@@ -683,6 +686,30 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
	return 0;
}


/**
 * lt_calculate_size() - Get storage size for lookup table with overflow check
 * @groups:	Amount of bit groups
 * @bb:		Number of bits grouped together in lookup table buckets
 * @bsize:	Size of each bucket in lookup table, in longs
 *
 * Return: allocation size including alignment overhead, negative on overflow
 */
static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb,
				 unsigned int bsize)
{
	ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long);

	if (check_mul_overflow(ret, bsize, &ret))
		return -1;
	if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret))
		return -1;
	if (ret > INT_MAX)
		return -1;

	return ret;
}

/**
 * pipapo_resize() - Resize lookup or mapping table, or both
 * @f:		Field containing lookup and mapping tables
@@ -701,6 +728,7 @@ static int pipapo_resize(struct nft_pipapo_field *f,
	long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
	unsigned int new_bucket_size, copy;
	int group, bucket, err;
	ssize_t lt_size;

	if (rules >= NFT_PIPAPO_RULE0_MAX)
		return -ENOSPC;
@@ -719,10 +747,11 @@ static int pipapo_resize(struct nft_pipapo_field *f,
	else
		copy = new_bucket_size;

	new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) *
			  new_bucket_size * sizeof(*new_lt) +
			  NFT_PIPAPO_ALIGN_HEADROOM,
			  GFP_KERNEL);
	lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size);
	if (lt_size < 0)
		return -ENOMEM;

	new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
	if (!new_lt)
		return -ENOMEM;

@@ -907,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
{
	unsigned int groups, bb;
	unsigned long *new_lt;
	size_t lt_size;
	ssize_t lt_size;

	lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
		  sizeof(*f->lt);
@@ -917,15 +946,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
		groups = f->groups * 2;
		bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET;

		lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
			  sizeof(*f->lt);
		lt_size = lt_calculate_size(groups, bb, f->bsize);
		if (lt_size < 0)
			return;
	} else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET &&
		   lt_size < NFT_PIPAPO_LT_SIZE_LOW) {
		groups = f->groups / 2;
		bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET;

		lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
			  sizeof(*f->lt);
		lt_size = lt_calculate_size(groups, bb, f->bsize);
		if (lt_size < 0)
			return;

		/* Don't increase group width if the resulting lookup table size
		 * would exceed the upper size threshold for a "small" set.
@@ -936,7 +967,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
		return;
	}

	new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT);
	new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
	if (!new_lt)
		return;

@@ -1451,13 +1482,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)

	for (i = 0; i < old->field_count; i++) {
		unsigned long *new_lt;
		ssize_t lt_size;

		memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));

		new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
				  src->bsize * sizeof(*dst->lt) +
				  NFT_PIPAPO_ALIGN_HEADROOM,
				  GFP_KERNEL_ACCOUNT);
		lt_size = lt_calculate_size(src->groups, src->bb, src->bsize);
		if (lt_size < 0)
			goto out_lt;

		new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
		if (!new_lt)
			goto out_lt;

@@ -1469,6 +1502,9 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
		       src->groups * NFT_PIPAPO_BUCKETS(src->bb));

		if (src->rules > 0) {
			if (src->rules_alloc > (INT_MAX / sizeof(*src->mt)))
				goto out_mt;

			dst->mt = kvmalloc_array(src->rules_alloc,
						 sizeof(*src->mt),
						 GFP_KERNEL_ACCOUNT);
Loading