Commit fc14f9c0 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Fix racy non-atomic read-then-increment operation with
   PREEMPT_RT in nft_ct, from Sebastian Andrzej Siewior.

2) GC is not skipped when jiffies wrap around in nf_conncount,
   from Nicklas Bo Jensen.

3) flush_work() on nf_tables_destroy_work waits for the last queued
   instance, this could be an instance that is different from the one
   that we must wait for, then make destruction work queue.

* tag 'nf-25-03-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: make destruction work queue pernet
  netfilter: nf_conncount: garbage collection is not skipped when jiffies wrap around
  netfilter: nft_ct: Use __refcount_inc() for per-CPU nft_ct_pcpu_template.
====================

Link: https://patch.msgid.link/20250306153446.46712-1-pablo@netfilter.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 115ef44a fb828656
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);

void nf_tables_trans_destroy_flush_work(void);
void nf_tables_trans_destroy_flush_work(struct net *net);

int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
@@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
	struct list_head	tables;
	struct list_head	commit_list;
	struct list_head	destroy_list;
	struct list_head	commit_set_list;
	struct list_head	binding_list;
	struct list_head	module_list;
@@ -1915,6 +1916,7 @@ struct nftables_pernet {
	unsigned int		base_seq;
	unsigned int		gc_seq;
	u8			validate_state;
	struct work_struct	destroy_work;
};

extern unsigned int nf_tables_net_id;
+2 −2
Original line number Diff line number Diff line
@@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net,
	struct nf_conn *found_ct;
	unsigned int collect = 0;

	if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
	if ((u32)jiffies == list->last_gc)
		goto add_new_node;

	/* check the saved connections */
@@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net,
	bool ret = false;

	/* don't bother if we just did GC */
	if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
	if ((u32)jiffies == READ_ONCE(list->last_gc))
		return false;

	/* don't bother if other cpu is already doing GC */
+14 −10
Original line number Diff line number Diff line
@@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
@@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
	table->validate_state = new_validate_state;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);

static void nft_trans_gc_work(struct work_struct *work);
static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
@@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans)

static void nf_tables_trans_destroy_work(struct work_struct *w)
{
	struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
	struct nft_trans *trans, *next;
	LIST_HEAD(head);

	spin_lock(&nf_tables_destroy_list_lock);
	list_splice_init(&nf_tables_destroy_list, &head);
	list_splice_init(&nft_net->destroy_list, &head);
	spin_unlock(&nf_tables_destroy_list_lock);

	if (list_empty(&head))
@@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
	}
}

void nf_tables_trans_destroy_flush_work(void)
void nf_tables_trans_destroy_flush_work(struct net *net)
{
	flush_work(&trans_destroy_work);
	struct nftables_pernet *nft_net = nft_pernet(net);

	flush_work(&nft_net->destroy_work);
}
EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);

@@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net)

	trans->put_net = true;
	spin_lock(&nf_tables_destroy_list_lock);
	list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
	list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
	spin_unlock(&nf_tables_destroy_list_lock);

	nf_tables_module_autoload_cleanup(net);
	schedule_work(&trans_destroy_work);
	schedule_work(&nft_net->destroy_work);

	mutex_unlock(&nft_net->commit_mutex);
}
@@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,

	gc_seq = nft_gc_seq_begin(nft_net);

	nf_tables_trans_destroy_flush_work();
	nf_tables_trans_destroy_flush_work(net);
again:
	list_for_each_entry(table, &nft_net->tables, list) {
		if (nft_table_has_owner(table) &&
@@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net)

	INIT_LIST_HEAD(&nft_net->tables);
	INIT_LIST_HEAD(&nft_net->commit_list);
	INIT_LIST_HEAD(&nft_net->destroy_list);
	INIT_LIST_HEAD(&nft_net->commit_set_list);
	INIT_LIST_HEAD(&nft_net->binding_list);
	INIT_LIST_HEAD(&nft_net->module_list);
@@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net)
	nft_net->base_seq = 1;
	nft_net->gc_seq = 0;
	nft_net->validate_state = NFT_VALIDATE_SKIP;
	INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);

	return 0;
}
@@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net)
	if (!list_empty(&nft_net->module_list))
		nf_tables_module_autoload_cleanup(net);

	cancel_work_sync(&nft_net->destroy_work);
	__nft_release_tables(net);

	nft_gc_seq_end(nft_net, gc_seq);

	mutex_unlock(&nft_net->commit_mutex);

	WARN_ON_ONCE(!list_empty(&nft_net->tables));
	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
	WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
}

static void nf_tables_exit_batch(struct list_head *net_exit_list)
@@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void)
	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
	nft_chain_filter_fini();
	nft_chain_route_fini();
	nf_tables_trans_destroy_flush_work();
	unregister_pernet_subsys(&nf_tables_net_ops);
	cancel_work_sync(&trans_gc_work);
	cancel_work_sync(&trans_destroy_work);
	rcu_barrier();
	rhltable_destroy(&nft_objname_ht);
	nf_tables_core_module_exit();
+4 −4
Original line number Diff line number Diff line
@@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
	return 0;
}

static void nft_compat_wait_for_destructors(void)
static void nft_compat_wait_for_destructors(struct net *net)
{
	/* xtables matches or targets can have side effects, e.g.
	 * creation/destruction of /proc files.
@@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
	 * work queue.  If we have pending invocations we thus
	 * need to wait for those to finish.
	 */
	nf_tables_trans_destroy_flush_work();
	nf_tables_trans_destroy_flush_work(net);
}

static int
@@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,

	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);

	nft_compat_wait_for_destructors();
	nft_compat_wait_for_destructors(ctx->net);

	ret = xt_check_target(&par, size, proto, inv);
	if (ret < 0) {
@@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,

	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);

	nft_compat_wait_for_destructors();
	nft_compat_wait_for_destructors(ctx->net);

	return xt_check_match(&par, size, proto, inv);
}
+4 −2
Original line number Diff line number Diff line
@@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
	enum ip_conntrack_info ctinfo;
	u16 value = nft_reg_load16(&regs->data[priv->sreg]);
	struct nf_conn *ct;
	int oldcnt;

	ct = nf_ct_get(skb, &ctinfo);
	if (ct) /* already tracked */
@@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,

	ct = this_cpu_read(nft_ct_pcpu_template);

	if (likely(refcount_read(&ct->ct_general.use) == 1)) {
		refcount_inc(&ct->ct_general.use);
	__refcount_inc(&ct->ct_general.use, &oldcnt);
	if (likely(oldcnt == 1)) {
		nf_ct_zone_add(ct, &zone);
	} else {
		refcount_dec(&ct->ct_general.use);
		/* previous skb got queued to userspace, allocate temporary
		 * one until percpu template can be reused.
		 */