netfilter: ctnetlink: ensure safe access to master conntrack

Holding reference on the expectation is not sufficient, the master
conntrack object can just go away, making exp->master invalid.

To access exp->master safely:

- Grab the nf_conntrack_expect_lock, this gets serialized with
  clean_from_lists() which also holds this lock when the master
  conntrack goes away.

- Hold reference on master conntrack via nf_conntrack_find_get().
  Not so easy since the master tuple to look up for the master conntrack
  is not available in the existing problematic paths.

This patch goes for extending the nf_conntrack_expect_lock section
to address this issue for simplicity, in the cases that are described
below this is just slightly extending the lock section.

The add expectation command already holds a reference to the master
conntrack from ctnetlink_create_expect().

However, the delete expectation command needs to grab the spinlock
before looking up for the expectation. Expand the existing spinlock
section to address this to cover the expectation lookup. Note that,
the nf_ct_expect_iterate_net() calls already grabs the spinlock while
iterating over the expectation table, which is correct.

The get expectation command needs to grab the spinlock to ensure master
conntrack does not go away. This also expands the existing spinlock
section to cover the expectation lookup too. I needed to move the
netlink skb allocation out of the spinlock to keep it GFP_KERNEL.

For the expectation events, the IPEXP_DESTROY event is already delivered
under the spinlock, just move the delivery of IPEXP_NEW under the
spinlock too because the master conntrack event cache is reached through
exp->master.

While at it, add lockdep notations to help identify what codepaths need
to grab the spinlock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Pablo Neira Ayuso
2026-03-25 14:11:04 +01:00
parent f017941060
commit bffcaad9af
4 changed files with 35 additions and 10 deletions

View File

@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
static inline void lockdep_nfct_expect_lock_held(void)
{
lockdep_assert_held(&nf_conntrack_expect_lock);
}
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)

View File

@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
lockdep_nfct_expect_lock_held();
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)

View File

@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
lockdep_nfct_expect_lock_held();
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
@@ -118,6 +119,8 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
{
lockdep_nfct_expect_lock_held();
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
lockdep_nfct_expect_lock_held();
if (!cnet->expect_count)
return NULL;
@@ -454,6 +459,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
unsigned int h;
int ret = 0;
lockdep_nfct_expect_lock_held();
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
@@ -510,8 +517,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
nf_ct_expect_insert(expect);
spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
spin_unlock_bh(&nf_conntrack_expect_lock);
return 0;
out:
spin_unlock_bh(&nf_conntrack_expect_lock);

View File

@@ -3355,31 +3355,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
if (err < 0)
return err;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
kfree_skb(skb2);
return -ENOENT;
}
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
kfree_skb(skb2);
return -ENOENT;
}
}
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2) {
nf_ct_expect_put(exp);
return -ENOMEM;
}
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
if (err <= 0) {
kfree_skb(skb2);
return -ENOMEM;
@@ -3426,22 +3432,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (err < 0)
return err;
spin_lock_bh(&nf_conntrack_expect_lock);
/* bump usage count to 2 */
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
}
/* after list removal, usage count == 1 */
spin_lock_bh(&nf_conntrack_expect_lock);
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(info->nlh));