Commit 624d7a8a authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains a small batch of Netfilter/IPVS updates
for net-next:

1) Remove unused genmask parameter in nf_tables_addchain()

2) Speed up reads from /proc/net/ip_vs_conn, from Florian Westphal.

3) Skip empty buckets in hashlimit to avoid atomic operations that results
   in false positive reports by syzbot with lockdep enabled, patch from
   Eric Dumazet.

4) Add conntrack event timestamps available via ctnetlink,
   from Florian Westphal.

netfilter pull request 25-01-11

* tag 'nf-next-25-01-11' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: conntrack: add conntrack event timestamp
  netfilter: xt_hashlimit: htable_selective_cleanup() optimization
  ipvs: speed up reads from ip_vs_conn proc file
  netfilter: nf_tables: remove the genmask parameter
====================

Link: https://patch.msgid.link/20250111230800.67349-1-pablo@netfilter.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents a8d00668 601731fc
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <asm/local64.h>

enum nf_ct_ecache_state {
	NFCT_ECACHE_DESTROY_FAIL,	/* tried but failed to send destroy event */
@@ -20,6 +21,9 @@ enum nf_ct_ecache_state {

struct nf_conntrack_ecache {
	unsigned long cache;		/* bitops want long */
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	local64_t timestamp;		/* event timestamp, in nanoseconds */
#endif
	u16 ctmask;			/* bitmask of ct events to be delivered */
	u16 expmask;			/* bitmask of expect events to be delivered */
	u32 missed;			/* missed events */
@@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
	if (e == NULL)
		return;

#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	/* renew only if this is the first cached event, so that the
	 * timestamp reflects the first, not the last, generated event.
	 */
	if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0)
		local64_set(&e->timestamp, ktime_get_real_ns());
#endif

	set_bit(event, &e->cache);
#endif
}
+1 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@ enum ctattr_type {
	CTA_SYNPROXY,
	CTA_FILTER,
	CTA_STATUS_MASK,
	CTA_TIMESTAMP_EVENT,
	__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
+28 −22
Original line number Diff line number Diff line
@@ -1046,28 +1046,35 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
#ifdef CONFIG_PROC_FS
struct ip_vs_iter_state {
	struct seq_net_private	p;
	struct hlist_head	*l;
	unsigned int		bucket;
	unsigned int		skip_elems;
};

static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_array(struct ip_vs_iter_state *iter)
{
	int idx;
	struct ip_vs_conn *cp;
	struct ip_vs_iter_state *iter = seq->private;

	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
	for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) {
		unsigned int skip = 0;

		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
			/* __ip_vs_conn_get() is not needed by
			 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
			 */
			if (pos-- == 0) {
				iter->l = &ip_vs_conn_tab[idx];
			if (skip >= iter->skip_elems) {
				iter->bucket = idx;
				return cp;
			}

			++skip;
		}

		iter->skip_elems = 0;
		cond_resched_rcu();
	}

	iter->bucket = idx;
	return NULL;
}

@@ -1076,9 +1083,14 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
	struct ip_vs_iter_state *iter = seq->private;

	iter->l = NULL;
	rcu_read_lock();
	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
	if (*pos == 0) {
		iter->skip_elems = 0;
		iter->bucket = 0;
		return SEQ_START_TOKEN;
	}

	return ip_vs_conn_array(iter);
}

static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -1086,28 +1098,22 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
	struct ip_vs_conn *cp = v;
	struct ip_vs_iter_state *iter = seq->private;
	struct hlist_node *e;
	struct hlist_head *l = iter->l;
	int idx;

	++*pos;
	if (v == SEQ_START_TOKEN)
		return ip_vs_conn_array(seq, 0);
		return ip_vs_conn_array(iter);

	/* more on same hash chain? */
	e = rcu_dereference(hlist_next_rcu(&cp->c_list));
	if (e)
	if (e) {
		iter->skip_elems++;
		return hlist_entry(e, struct ip_vs_conn, c_list);

	idx = l - ip_vs_conn_tab;
	while (++idx < ip_vs_conn_tab_size) {
		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
			iter->l = &ip_vs_conn_tab[idx];
			return cp;
		}
		cond_resched_rcu();
	}
	iter->l = NULL;
	return NULL;

	iter->skip_elems = 0;
	iter->bucket++;

	return ip_vs_conn_array(iter);
}

static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
+23 −0
Original line number Diff line number Diff line
@@ -162,6 +162,14 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
	return ret;
}

static void nf_ct_ecache_tstamp_refresh(struct nf_conntrack_ecache *e)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	if (local64_read(&e->timestamp))
		local64_set(&e->timestamp, ktime_get_real_ns());
#endif
}

int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
				  u32 portid, int report)
{
@@ -186,6 +194,8 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
	/* This is a resent of a destroy event? If so, skip missed */
	missed = e->portid ? 0 : e->missed;

	nf_ct_ecache_tstamp_refresh(e);

	ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
	if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
		/* This is a destroy event that has been triggered by a process,
@@ -297,6 +307,18 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
	}
}

static void nf_ct_ecache_tstamp_new(const struct nf_conn *ct, struct nf_conntrack_ecache *e)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	u64 ts = 0;

	if (nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
		ts = ktime_get_real_ns();

	local64_set(&e->timestamp, ts);
#endif
}

bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
{
	struct net *net = nf_ct_net(ct);
@@ -326,6 +348,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp

	e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
	if (e) {
		nf_ct_ecache_tstamp_new(ct, e);
		e->ctmask  = ctmask;
		e->expmask = expmask;
	}
+25 −0
Original line number Diff line number Diff line
@@ -383,6 +383,23 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
#endif

#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int
ctnetlink_dump_event_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	const struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);

	if (e) {
		u64 ts = local64_read(&e->timestamp);

		if (ts)
			return nla_put_be64(skb, CTA_TIMESTAMP_EVENT,
					    cpu_to_be64(ts), CTA_TIMESTAMP_PAD);
	}
#endif
	return 0;
}

static inline int ctnetlink_label_size(const struct nf_conn *ct)
{
	struct nf_conn_labels *labels = nf_ct_labels_find(ct);
@@ -717,6 +734,9 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
#endif
	       + ctnetlink_proto_size(ct)
	       + ctnetlink_label_size(ct)
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
	       + nla_total_size(sizeof(u64)) /* CTA_TIMESTAMP_EVENT */
#endif
	       ;
}

@@ -838,6 +858,10 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
	if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
		goto nla_put_failure;
#endif

	if (ctnetlink_dump_event_timestamp(skb, ct))
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	err = nfnetlink_send(skb, net, item->portid, group, item->report,
			     GFP_ATOMIC);
@@ -1557,6 +1581,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
				    .len = NF_CT_LABELS_MAX_SIZE },
	[CTA_FILTER]		= { .type = NLA_NESTED },
	[CTA_STATUS_MASK]	= { .type = NLA_U32 },
	[CTA_TIMESTAMP_EVENT]	= { .type = NLA_REJECT },
};

static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
Loading