Merge tag 'nf-26-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter for net

This is v3, I kept back an ipset fix and another to tigthen the xtables
interface to reject invalid combinations with the NFPROTO_ARP family.
They need a bit more discussion. I fixed the issues reported by AI on
patch 9 (add #ifdef to access ct zone, update nf_conntrack_broadcast
and patch 10 (use better Fixes: tag). Thanks!

The following patchset contains Netfilter fixes for *net*.

Note that most bugs fixed here stem from 2.6 days, the large PR is not
due to an increase in regressions.

1) Fix incorrect reject of set updates with nf_tables pipapo set
   avx2 backend.  This comes with a regression test in patch 2.
   From Florian Westphal.

2) nfnetlink_log needs to zero padding to prevent infoleak to userspace,
   from Weiming Shi.

3) xtables ip6t_rt module never validated that addrnr length is within the
   allowed array boundary. Reject bogus values.  From Ren Wei.

4) Fix high memory usage in rbtree set backend that was unwanted side-effect
   of the recently added binary search blob. From Pablo Neira Ayuso.

5) Patches 5 to 10, also from Pablo, address long-standing RCU safety bugs
   in conntracks handling of expectations: We can never safely defer
   a conntrack extension area without holding a reference. Yet expectation
   handling does so in multiple places.  Fix this by avoiding the need to
   look into the master conntrack to begin with and by extending locked
   sections in a few places.

11) Fix use of uninitialized rtp_addr in the sip conntrack helper,
    also from Weiming Shi.

12) Add stricter netlink policy checks in ctnetlink, from David Carlier.
    This avoids undefined behaviour when userspace provides huge wscale
    value.

netfilter pull request 26-03-26

* tag 'nf-26-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: ctnetlink: use netlink policy range checks
  netfilter: nf_conntrack_sip: fix use of uninitialized rtp_addr in process_sdp
  netfilter: nf_conntrack_expect: skip expectations in other netns via proc
  netfilter: nf_conntrack_expect: store netns and zone in expectation
  netfilter: ctnetlink: ensure safe access to master conntrack
  netfilter: nf_conntrack_expect: use expect->helper
  netfilter: nf_conntrack_expect: honor expectation helper field
  netfilter: nft_set_rbtree: revisit array resize logic
  netfilter: ip6t_rt: reject oversized addrnr in rt_mt6_check()
  netfilter: nfnetlink_log: fix uninitialized padding leak in NFULA_PAYLOAD
  selftests: netfilter: nft_concat_range.sh: add check for flush+reload bug
  netfilter: nft_set_pipapo_avx2: don't return non-matching entry on expiry
====================

Link: https://patch.msgid.link/20260326125153.685915-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2026-03-26 15:38:14 +01:00
16 changed files with 296 additions and 102 deletions

View File

@@ -83,6 +83,11 @@ void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock;
static inline void lockdep_nfct_expect_lock_held(void)
{
lockdep_assert_held(&nf_conntrack_expect_lock);
}
/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)

View File

@@ -22,10 +22,16 @@ struct nf_conntrack_expect {
/* Hash member */
struct hlist_node hnode;
/* Network namespace */
possible_net_t net;
/* We expect this tuple, with the following mask */
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
#ifdef CONFIG_NF_CONNTRACK_ZONES
struct nf_conntrack_zone zone;
#endif
/* Usage count. */
refcount_t use;
@@ -40,7 +46,7 @@ struct nf_conntrack_expect {
struct nf_conntrack_expect *this);
/* Helper to assign to new connection */
struct nf_conntrack_helper *helper;
struct nf_conntrack_helper __rcu *helper;
/* The conntrack of the master connection */
struct nf_conn *master;
@@ -62,7 +68,17 @@ struct nf_conntrack_expect {
static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
{
return nf_ct_net(exp->master);
return read_pnet(&exp->net);
}
static inline bool nf_ct_exp_zone_equal_any(const struct nf_conntrack_expect *a,
const struct nf_conntrack_zone *b)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
return a->zone.id == b->id;
#else
return true;
#endif
}
#define NF_CT_EXP_POLICY_NAME_LEN 16

View File

@@ -159,5 +159,9 @@ enum ip_conntrack_expect_events {
#define NF_CT_EXPECT_INACTIVE 0x2
#define NF_CT_EXPECT_USERSPACE 0x4
#ifdef __KERNEL__
#define NF_CT_EXPECT_MASK (NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE | \
NF_CT_EXPECT_USERSPACE)
#endif
#endif /* _UAPI_NF_CONNTRACK_COMMON_H */

View File

@@ -157,6 +157,10 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
pr_debug("unknown flags %X\n", rtinfo->invflags);
return -EINVAL;
}
if (rtinfo->addrnr > IP6T_RT_HOPS) {
pr_debug("too many addresses specified\n");
return -EINVAL;
}
if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
(!(rtinfo->flags & IP6T_RT_TYP) ||
(rtinfo->rt_type != 0) ||

View File

@@ -21,6 +21,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
unsigned int timeout)
{
const struct nf_conntrack_helper *helper;
struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
@@ -70,8 +71,11 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->expectfn = NULL;
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
exp->helper = NULL;
rcu_assign_pointer(exp->helper, helper);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
#endif
nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);

View File

@@ -247,6 +247,8 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
lockdep_nfct_expect_lock_held();
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)

View File

@@ -51,6 +51,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
struct nf_conntrack_net *cnet;
lockdep_nfct_expect_lock_held();
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
@@ -112,12 +113,14 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
net_eq(net, nf_ct_net(i->master)) &&
nf_ct_zone_equal_any(i->master, zone);
net_eq(net, read_pnet(&i->net)) &&
nf_ct_exp_zone_equal_any(i, zone);
}
bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
{
lockdep_nfct_expect_lock_held();
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
@@ -177,6 +180,8 @@ nf_ct_find_expectation(struct net *net,
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
lockdep_nfct_expect_lock_held();
if (!cnet->expect_count)
return NULL;
@@ -309,12 +314,20 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
/* This function can only be used from packet path, where accessing
* master's helper is safe, because the packet holds a reference on
* the conntrack object. Never use it from control plane.
*/
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
struct nf_conntrack_helper *helper = NULL;
struct nf_conn *ct = exp->master;
struct net *net = read_pnet(&ct->ct_net);
struct nf_conn_help *help;
int len;
if (family == AF_INET)
@@ -325,7 +338,16 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
exp->helper = NULL;
help = nfct_help(ct);
if (help)
helper = rcu_dereference(help->helper);
rcu_assign_pointer(exp->helper, helper);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
#endif
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
@@ -442,6 +464,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
unsigned int h;
int ret = 0;
lockdep_nfct_expect_lock_held();
if (!master_help) {
ret = -ESHUTDOWN;
goto out;
@@ -498,8 +522,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
nf_ct_expect_insert(expect);
spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
spin_unlock_bh(&nf_conntrack_expect_lock);
return 0;
out:
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -627,11 +652,15 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
struct nf_conntrack_helper *helper;
struct net *net = seq_file_net(s);
struct hlist_node *n = v;
char *delim = "";
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
if (!net_eq(nf_ct_exp_net(expect), net))
return 0;
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
? (long)(expect->timeout.expires - jiffies)/HZ : 0);
@@ -654,7 +683,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
if (expect->flags & NF_CT_EXPECT_USERSPACE)
seq_printf(s, "%sUSERSPACE", delim);
helper = rcu_dereference(nfct_help(expect->master)->helper);
helper = rcu_dereference(expect->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
if (helper->expect_policy[expect->class].name[0])

View File

@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
exp->helper = &nf_conntrack_helper_h245;
rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->helper = nf_conntrack_helper_q931;
rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
exp->helper = nf_conntrack_helper_q931;
rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
exp->helper = nf_conntrack_helper_ras;
rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");

View File

@@ -395,14 +395,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
struct nf_conn_help *help = nfct_help(exp->master);
const struct nf_conntrack_helper *me = data;
const struct nf_conntrack_helper *this;
if (exp->helper == me)
return true;
this = rcu_dereference_protected(help->helper,
this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
@@ -421,6 +417,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
/* nf_ct_iterate_destroy() does an unconditional synchronize_rcu() as
* last step, this ensures rcu readers of exp->helper are done.
* No need for another synchronize_rcu() here.
*/
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);

View File

@@ -910,8 +910,8 @@ struct ctnetlink_filter {
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
[CTA_FILTER_ORIG_FLAGS] = { .type = NLA_U32 },
[CTA_FILTER_REPLY_FLAGS] = { .type = NLA_U32 },
[CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
[CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL),
};
static int ctnetlink_parse_filter(const struct nlattr *attr,
@@ -925,17 +925,11 @@ static int ctnetlink_parse_filter(const struct nlattr *attr,
if (ret)
return ret;
if (tb[CTA_FILTER_ORIG_FLAGS]) {
if (tb[CTA_FILTER_ORIG_FLAGS])
filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
if (filter->orig_flags & ~CTA_FILTER_F_ALL)
return -EOPNOTSUPP;
}
if (tb[CTA_FILTER_REPLY_FLAGS]) {
if (tb[CTA_FILTER_REPLY_FLAGS])
filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
if (filter->reply_flags & ~CTA_FILTER_F_ALL)
return -EOPNOTSUPP;
}
return 0;
}
@@ -2634,7 +2628,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
[CTA_EXPECT_ZONE] = { .type = NLA_U16 },
[CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
[CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK),
[CTA_EXPECT_CLASS] = { .type = NLA_U32 },
[CTA_EXPECT_NAT] = { .type = NLA_NESTED },
[CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
@@ -3012,7 +3006,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
{
struct nf_conn *master = exp->master;
long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
struct nf_conn_help *help;
struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
struct nf_conntrack_tuple nat_tuple = {};
@@ -3057,15 +3051,12 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
help = nfct_help(master);
if (help) {
struct nf_conntrack_helper *helper;
helper = rcu_dereference(help->helper);
if (helper &&
nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
goto nla_put_failure;
}
helper = rcu_dereference(exp->helper);
if (helper &&
nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name))
goto nla_put_failure;
expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn);
if (expfn != NULL &&
nla_put_string(skb, CTA_EXPECT_FN, expfn->name))
@@ -3358,31 +3349,37 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
if (err < 0)
return err;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
kfree_skb(skb2);
return -ENOENT;
}
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
kfree_skb(skb2);
return -ENOENT;
}
}
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2) {
nf_ct_expect_put(exp);
return -ENOMEM;
}
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
if (err <= 0) {
kfree_skb(skb2);
return -ENOMEM;
@@ -3394,12 +3391,9 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
{
struct nf_conntrack_helper *helper;
const struct nf_conn_help *m_help;
const char *name = data;
m_help = nfct_help(exp->master);
helper = rcu_dereference(m_help->helper);
helper = rcu_dereference(exp->helper);
if (!helper)
return false;
@@ -3432,22 +3426,26 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
if (err < 0)
return err;
spin_lock_bh(&nf_conntrack_expect_lock);
/* bump usage count to 2 */
exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
return -ENOENT;
}
}
/* after list removal, usage count == 1 */
spin_lock_bh(&nf_conntrack_expect_lock);
if (timer_delete(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(info->nlh));
@@ -3534,9 +3532,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
u_int32_t class = 0;
struct net *net = read_pnet(&ct->ct_net);
struct nf_conntrack_expect *exp;
struct nf_conn_help *help;
u32 class = 0;
int err;
help = nfct_help(ct);
@@ -3573,7 +3572,13 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->class = class;
exp->master = ct;
exp->helper = helper;
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
#endif
if (!helper)
helper = rcu_dereference(help->helper);
rcu_assign_pointer(exp->helper, helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;

View File

@@ -1385,9 +1385,9 @@ nla_put_failure:
}
static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
[CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
[CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
[CTA_PROTOINFO_TCP_STATE] = NLA_POLICY_MAX(NLA_U8, TCP_CONNTRACK_SYN_SENT2),
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
[CTA_PROTOINFO_TCP_WSCALE_REPLY] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE),
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
@@ -1414,10 +1414,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
if (err < 0)
return err;
if (tb[CTA_PROTOINFO_TCP_STATE] &&
nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
return -EINVAL;
spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);

View File

@@ -924,7 +924,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
exp->helper != nfct_help(ct)->helper ||
exp->class != class)
break;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -1040,6 +1040,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
unsigned int port;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
bool have_rtp_addr = false;
hooks = rcu_dereference(nf_nat_sip_hooks);
@@ -1056,8 +1057,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
caddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
&matchoff, &matchlen, &caddr) > 0)
&matchoff, &matchlen, &caddr) > 0) {
caddr_len = matchlen;
memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
have_rtp_addr = true;
}
mediaoff = sdpoff;
for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) {
@@ -1091,9 +1095,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
&matchoff, &matchlen, &maddr) > 0) {
maddr_len = matchlen;
memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
} else if (caddr_len)
have_rtp_addr = true;
} else if (caddr_len) {
memcpy(&rtp_addr, &caddr, sizeof(rtp_addr));
else {
have_rtp_addr = true;
} else {
nf_ct_helper_log(skb, ct, "cannot parse SDP message");
return NF_DROP;
}
@@ -1125,7 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
/* Update session connection and owner addresses */
hooks = rcu_dereference(nf_nat_sip_hooks);
if (hooks && ct->status & IPS_NAT_MASK)
if (hooks && ct->status & IPS_NAT_MASK && have_rtp_addr)
ret = hooks->sdp_session(skb, protoff, dataoff,
dptr, datalen, sdpoff,
&rtp_addr);
@@ -1297,7 +1303,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
exp->helper = helper;
rcu_assign_pointer(exp->helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);

View File

@@ -647,15 +647,11 @@ __build_packet_message(struct nfnl_log_net *log,
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
if (skb_tailroom(inst->skb) < nla_total_size(data_len))
nla = nla_reserve(inst->skb, NFULA_PAYLOAD, data_len);
if (!nla)
goto nla_put_failure;
nla = skb_put(inst->skb, nla_total_size(data_len));
nla->nla_type = NFULA_PAYLOAD;
nla->nla_len = size;
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}

View File

@@ -242,7 +242,7 @@ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -319,7 +319,7 @@ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -414,7 +414,7 @@ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -505,7 +505,7 @@ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -641,7 +641,7 @@ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -699,7 +699,7 @@ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -764,7 +764,7 @@ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -839,7 +839,7 @@ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -925,7 +925,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;
@@ -1019,7 +1019,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
if (last)
return b;
ret = b;
if (unlikely(ret == -1))
ret = b / XSAVE_YMM_SIZE;

View File

@@ -572,14 +572,12 @@ static struct nft_array *nft_array_alloc(u32 max_intervals)
return array;
}
#define NFT_ARRAY_EXTRA_SIZE 10240
/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
* packed representation coming from userspace for anonymous sets too.
*/
static u32 nft_array_elems(const struct nft_set *set)
{
u32 nelems = atomic_read(&set->nelems);
u32 nelems = atomic_read(&set->nelems) - set->ndeact;
/* Adjacent intervals are represented with a single start element in
* anonymous sets, use the current element counter as is.
@@ -595,27 +593,87 @@ static u32 nft_array_elems(const struct nft_set *set)
return (nelems / 2) + 2;
}
static int nft_array_may_resize(const struct nft_set *set)
#define NFT_ARRAY_INITIAL_SIZE 1024
#define NFT_ARRAY_INITIAL_ANON_SIZE 16
#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval))
static int nft_array_may_resize(const struct nft_set *set, bool flush)
{
u32 nelems = nft_array_elems(set), new_max_intervals;
u32 initial_intervals, max_intervals, new_max_intervals, delta;
u32 shrinked_max_intervals, nelems = nft_array_elems(set);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array;
if (!priv->array_next) {
array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
if (nft_set_is_anonymous(set))
initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;
else
initial_intervals = NFT_ARRAY_INITIAL_SIZE;
if (priv->array_next) {
max_intervals = priv->array_next->max_intervals;
new_max_intervals = priv->array_next->max_intervals;
} else {
if (priv->array) {
max_intervals = priv->array->max_intervals;
new_max_intervals = priv->array->max_intervals;
} else {
max_intervals = 0;
new_max_intervals = initial_intervals;
}
}
if (nft_set_is_anonymous(set))
goto maybe_grow;
if (flush) {
/* Set flush just started, nelems still report elements.*/
nelems = 0;
new_max_intervals = NFT_ARRAY_INITIAL_SIZE;
goto realloc_array;
}
if (check_add_overflow(new_max_intervals, new_max_intervals,
&shrinked_max_intervals))
return -EOVERFLOW;
shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);
if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&
nelems < shrinked_max_intervals) {
new_max_intervals = shrinked_max_intervals;
goto realloc_array;
}
maybe_grow:
if (nelems > new_max_intervals) {
if (nft_set_is_anonymous(set) &&
new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {
new_max_intervals <<= 1;
} else {
delta = new_max_intervals >> 1;
if (check_add_overflow(new_max_intervals, delta,
&new_max_intervals))
return -EOVERFLOW;
}
}
realloc_array:
if (WARN_ON_ONCE(nelems > new_max_intervals))
return -ENOMEM;
if (priv->array_next) {
if (max_intervals == new_max_intervals)
return 0;
if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
return -ENOMEM;
} else {
array = nft_array_alloc(new_max_intervals);
if (!array)
return -ENOMEM;
priv->array_next = array;
}
if (nelems < priv->array_next->max_intervals)
return 0;
new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
return -ENOMEM;
return 0;
}
@@ -630,7 +688,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
if (nft_array_may_resize(set) < 0)
if (nft_array_may_resize(set, false) < 0)
return -ENOMEM;
do {
@@ -741,7 +799,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_null(set, this))
priv->start_rbe_cookie = 0;
if (nft_array_may_resize(set) < 0)
if (nft_array_may_resize(set, false) < 0)
return NULL;
while (parent != NULL) {
@@ -811,7 +869,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
switch (iter->type) {
case NFT_ITER_UPDATE_CLONE:
if (nft_array_may_resize(set) < 0) {
if (nft_array_may_resize(set, true) < 0) {
iter->err = -ENOMEM;
break;
}

View File

@@ -29,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate insert_overlap"
BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate
insert_overlap load_flush_load4 load_flush_load8"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -432,6 +433,30 @@ race_repeat 0
perf_duration 0
"
TYPE_load_flush_load4="
display reload with flush, 4bit groups
type_spec ipv4_addr . ipv4_addr
chain_spec ip saddr . ip daddr
dst addr4
proto icmp
race_repeat 0
perf_duration 0
"
TYPE_load_flush_load8="
display reload with flush, 8bit groups
type_spec ipv4_addr . ipv4_addr
chain_spec ip saddr . ip daddr
dst addr4
proto icmp
race_repeat 0
perf_duration 0
"
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1997,6 +2022,49 @@ test_bug_insert_overlap()
return 0
}
test_bug_load_flush_load4()
{
local i
setup veth send_"${proto}" set || return ${ksft_skip}
for i in $(seq 0 255); do
local addelem="add element inet filter test"
local j
for j in $(seq 0 20); do
echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }"
echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }"
done
done > "$tmp"
nft -f "$tmp" || return 1
( echo "flush set inet filter test";cat "$tmp") | nft -f -
[ $? -eq 0 ] || return 1
return 0
}
test_bug_load_flush_load8()
{
local i
setup veth send_"${proto}" set || return ${ksft_skip}
for i in $(seq 1 100); do
echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }"
echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }"
done > "$tmp"
nft -f "$tmp" || return 1
( echo "flush set inet filter test";cat "$tmp") | nft -f -
[ $? -eq 0 ] || return 1
return 0
}
test_reported_issues() {
eval test_bug_"${subtest}"
}